diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -376,7 +376,7 @@ [], name#"\t${off}(${addr})$p2align, $vec, $idx", name#"\t$off$p2align, $idx", simdop>; defm STORE_LANE_#vec#_A64 : - SIMD_I<(outs V128:$dst), + SIMD_I<(outs), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx, I64:$addr, V128:$vec), (outs), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx), diff --git a/llvm/test/CodeGen/WebAssembly/pr59626.ll b/llvm/test/CodeGen/WebAssembly/pr59626.ll --- a/llvm/test/CodeGen/WebAssembly/pr59626.ll +++ b/llvm/test/CodeGen/WebAssembly/pr59626.ll @@ -34,7 +34,6 @@ ; CHECK-64-NEXT: local.get 2 ; CHECK-64-NEXT: i8x16.splat ; CHECK-64-NEXT: v128.store16_lane 0, 0 -; CHECK-64-NEXT: drop ; CHECK-64-NEXT: v128.const 0, 0 ; CHECK-64-NEXT: i32x4.extract_lane 0 ; CHECK-64-NEXT: # fallthrough-return diff --git a/llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll b/llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll --- a/llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll @@ -1,13 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 -mtriple=wasm32-unknown-unknown | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 -mtriple=wasm64-unknown-unknown | FileCheck %s --check-prefix MEM64 ; Test SIMD v128.load{8,16,32,64}_lane instructions. ; TODO: Use the offset field by supporting more patterns. Right now only the ; equivalents of LoadPatNoOffset/StorePatNoOffset are supported. -target triple = "wasm32-unknown-unknown" - ;===---------------------------------------------------------------------------- ; v128.load8_lane / v128.store8_lane ;===---------------------------------------------------------------------------- @@ -20,6 +19,14 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load8_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i8_no_offset: +; MEM64: .functype load_lane_i8_no_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load8_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %x = load i8, ptr %p %t = insertelement <16 x i8> %v, i8 %x, i32 0 ret <16 x i8> %t @@ -35,6 +42,18 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load8_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i8_with_folded_offset: +; MEM64: .functype load_lane_i8_with_folded_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i32.wrap_i64 +; MEM64-NEXT: i32.const 24 +; MEM64-NEXT: i32.add +; MEM64-NEXT: i64.extend_i32_u +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load8_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %q = ptrtoint ptr %p to i32 %r = add nuw i32 %q, 24 %s = inttoptr i32 %r to ptr @@ -53,6 +72,16 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load8_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i8_with_folded_gep_offset: +; MEM64: .functype load_lane_i8_with_folded_gep_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i64.const 6 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load8_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr inbounds i8, ptr %p, i32 6 %x = load i8, ptr %s %t = insertelement <16 x i8> %v, i8 %x, i32 0 @@ -69,6 +98,16 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load8_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i8_with_unfolded_gep_negative_offset: +; MEM64: .functype load_lane_i8_with_unfolded_gep_negative_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i64.const -6 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load8_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr inbounds i8, ptr %p, i32 -6 %x = load i8, ptr %s %t = insertelement <16 x i8> %v, i8 %x, i32 0 @@ -85,6 +124,18 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load8_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i8_with_unfolded_offset: +; MEM64: .functype load_lane_i8_with_unfolded_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i32.wrap_i64 +; MEM64-NEXT: i32.const 24 +; MEM64-NEXT: i32.add +; MEM64-NEXT: i64.extend_i32_u +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load8_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %q = ptrtoint ptr %p to i32 %r = add nsw i32 %q, 24 %s = inttoptr i32 %r to ptr @@ -103,6 +154,16 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load8_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i8_with_unfolded_gep_offset: +; MEM64: .functype load_lane_i8_with_unfolded_gep_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i64.const 6 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load8_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr i8, ptr %p, i32 6 %x = load i8, ptr %s %t = insertelement <16 x i8> %v, i8 %x, i32 0 @@ -117,6 +178,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.load8_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i8_from_numeric_address: +; MEM64: .functype load_lane_i8_from_numeric_address (v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: i64.const 42 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.load8_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = inttoptr i32 42 to ptr %x = load i8, ptr %s %t = insertelement <16 x i8> %v, i8 %x, i32 0 @@ -132,6 +201,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.load8_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i8_from_global_address: +; MEM64: .functype load_lane_i8_from_global_address (v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: i64.const gv_i8 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.load8_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %x = load i8, ptr @gv_i8 %t = insertelement <16 x i8> %v, i8 %x, i32 0 ret <16 x i8> %t @@ -145,6 +222,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store8_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i8_no_offset: +; MEM64: .functype store_lane_i8_no_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store8_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %x = extractelement <16 x i8> %v, i32 0 store i8 %x, ptr %p ret void @@ -158,6 +243,18 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store8_lane 24, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i8_with_folded_offset: +; MEM64: .functype store_lane_i8_with_folded_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: i32.wrap_i64 +; MEM64-NEXT: i32.const 24 +; MEM64-NEXT: i32.add +; MEM64-NEXT: i64.extend_i32_u +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store8_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %q = ptrtoint ptr %p to i32 %r = add nuw i32 %q, 24 %s = inttoptr i32 %r to ptr @@ -174,6 +271,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store8_lane 6, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i8_with_folded_gep_offset: +; MEM64: .functype store_lane_i8_with_folded_gep_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store8_lane 6, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr inbounds i8, ptr %p, i32 6 %x = extractelement <16 x i8> %v, i32 0 store i8 %x, ptr %s @@ -190,6 +295,16 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store8_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i8_with_unfolded_gep_negative_offset: +; MEM64: .functype store_lane_i8_with_unfolded_gep_negative_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: i64.const -6 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store8_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr inbounds i8, ptr %p, i32 -6 %x = extractelement <16 x i8> %v, i32 0 store i8 %x, ptr %s @@ -206,6 +321,18 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store8_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i8_with_unfolded_offset: +; MEM64: .functype store_lane_i8_with_unfolded_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: i32.wrap_i64 +; MEM64-NEXT: i32.const 24 +; MEM64-NEXT: i32.add +; MEM64-NEXT: i64.extend_i32_u +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store8_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %q = ptrtoint ptr %p to i32 %r = add nsw i32 %q, 24 %s = inttoptr i32 %r to ptr @@ -224,6 +351,16 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store8_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i8_with_unfolded_gep_offset: +; MEM64: .functype store_lane_i8_with_unfolded_gep_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: i64.const 6 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store8_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr i8, ptr %p, i32 6 %x = extractelement <16 x i8> %v, i32 0 store i8 %x, ptr %s @@ -238,6 +375,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store8_lane 42, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i8_to_numeric_address: +; MEM64: .functype store_lane_i8_to_numeric_address (v128) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: i64.const 0 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store8_lane 42, 0 +; MEM64-NEXT: # fallthrough-return %s = inttoptr i32 42 to ptr %x = extractelement <16 x i8> %v, i32 0 store i8 %x, ptr %s @@ -252,6 +397,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store8_lane gv_i8, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i8_from_global_address: +; MEM64: .functype store_lane_i8_from_global_address (v128) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: i64.const 0 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store8_lane gv_i8, 0 +; MEM64-NEXT: # fallthrough-return %x = extractelement <16 x i8> %v, i32 0 store i8 %x, ptr @gv_i8 ret void @@ -269,6 +422,14 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load16_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i16_no_offset: +; MEM64: .functype load_lane_i16_no_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load16_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %x = load i16, ptr %p %t = insertelement <8 x i16> %v, i16 %x, i32 0 ret <8 x i16> %t @@ -284,6 +445,18 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load16_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i16_with_folded_offset: +; MEM64: .functype load_lane_i16_with_folded_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i32.wrap_i64 +; MEM64-NEXT: i32.const 24 +; MEM64-NEXT: i32.add +; MEM64-NEXT: i64.extend_i32_u +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load16_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %q = ptrtoint ptr %p to i32 %r = add nuw i32 %q, 24 %s = inttoptr i32 %r to ptr @@ -302,6 +475,16 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load16_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i16_with_folded_gep_offset: +; MEM64: .functype load_lane_i16_with_folded_gep_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i64.const 12 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load16_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr inbounds i16, ptr %p, i32 6 %x = load i16, ptr %s %t = insertelement <8 x i16> %v, i16 %x, i32 0 @@ -318,6 +501,16 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load16_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i16_with_unfolded_gep_negative_offset: +; MEM64: .functype load_lane_i16_with_unfolded_gep_negative_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i64.const -12 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load16_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr inbounds i16, ptr %p, i32 -6 %x = load i16, ptr %s %t = insertelement <8 x i16> %v, i16 %x, i32 0 @@ -334,6 +527,18 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load16_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i16_with_unfolded_offset: +; MEM64: .functype load_lane_i16_with_unfolded_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i32.wrap_i64 +; MEM64-NEXT: i32.const 24 +; MEM64-NEXT: i32.add +; MEM64-NEXT: i64.extend_i32_u +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load16_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %q = ptrtoint ptr %p to i32 %r = add nsw i32 %q, 24 %s = inttoptr i32 %r to ptr @@ -352,6 +557,16 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load16_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i16_with_unfolded_gep_offset: +; MEM64: .functype load_lane_i16_with_unfolded_gep_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i64.const 12 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load16_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr i16, ptr %p, i32 6 %x = load i16, ptr %s %t = insertelement <8 x i16> %v, i16 %x, i32 0 @@ -366,6 +581,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.load16_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i16_from_numeric_address: +; MEM64: .functype load_lane_i16_from_numeric_address (v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: i64.const 42 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.load16_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = inttoptr i32 42 to ptr %x = load i16, ptr %s %t = insertelement <8 x i16> %v, i16 %x, i32 0 @@ -381,6 +604,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.load16_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i16_from_global_address: +; MEM64: .functype load_lane_i16_from_global_address (v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: i64.const gv_i16 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.load16_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %x = load i16, ptr @gv_i16 %t = insertelement <8 x i16> %v, i16 %x, i32 0 ret <8 x i16> %t @@ -394,6 +625,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store16_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i16_no_offset: +; MEM64: .functype store_lane_i16_no_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store16_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %x = extractelement <8 x i16> %v, i32 0 store i16 %x, ptr %p ret void @@ -407,6 +646,18 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store16_lane 24, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i16_with_folded_offset: +; MEM64: .functype store_lane_i16_with_folded_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: i32.wrap_i64 +; MEM64-NEXT: i32.const 24 +; MEM64-NEXT: i32.add +; MEM64-NEXT: i64.extend_i32_u +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store16_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %q = ptrtoint ptr %p to i32 %r = add nuw i32 %q, 24 %s = inttoptr i32 %r to ptr @@ -423,6 +674,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store16_lane 12, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i16_with_folded_gep_offset: +; MEM64: .functype store_lane_i16_with_folded_gep_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store16_lane 12, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr inbounds i16, ptr %p, i32 6 %x = extractelement <8 x i16> %v, i32 0 store i16 %x, ptr %s @@ -439,6 +698,16 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store16_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i16_with_unfolded_gep_negative_offset: +; MEM64: .functype store_lane_i16_with_unfolded_gep_negative_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: i64.const -12 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store16_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr inbounds i16, ptr %p, i32 -6 %x = extractelement <8 x i16> %v, i32 0 store i16 %x, ptr %s @@ -455,6 +724,18 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store16_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i16_with_unfolded_offset: +; MEM64: .functype store_lane_i16_with_unfolded_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: i32.wrap_i64 +; MEM64-NEXT: i32.const 24 +; MEM64-NEXT: i32.add +; MEM64-NEXT: i64.extend_i32_u +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store16_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %q = ptrtoint ptr %p to i32 %r = add nsw i32 %q, 24 %s = inttoptr i32 %r to ptr @@ -473,6 +754,16 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store16_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i16_with_unfolded_gep_offset: +; MEM64: .functype store_lane_i16_with_unfolded_gep_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: i64.const 12 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store16_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr i16, ptr %p, i32 6 %x = extractelement <8 x i16> %v, i32 0 store i16 %x, ptr %s @@ -487,6 +778,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store16_lane 42, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i16_to_numeric_address: +; MEM64: .functype store_lane_i16_to_numeric_address (v128) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: i64.const 0 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store16_lane 42, 0 +; MEM64-NEXT: # fallthrough-return %s = inttoptr i32 42 to ptr %x = extractelement <8 x i16> %v, i32 0 store i16 %x, ptr %s @@ -501,6 +800,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store16_lane gv_i16, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i16_from_global_address: +; MEM64: .functype store_lane_i16_from_global_address (v128) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: i64.const 0 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store16_lane gv_i16, 0 +; MEM64-NEXT: # fallthrough-return %x = extractelement <8 x i16> %v, i32 0 store i16 %x, ptr @gv_i16 ret void @@ -518,6 +825,14 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load32_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i32_no_offset: +; MEM64: .functype load_lane_i32_no_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load32_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %x = load i32, ptr %p %t = insertelement <4 x i32> %v, i32 %x, i32 0 ret <4 x i32> %t @@ -533,6 +848,18 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load32_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i32_with_folded_offset: +; MEM64: .functype load_lane_i32_with_folded_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i32.wrap_i64 +; MEM64-NEXT: i32.const 24 +; MEM64-NEXT: i32.add +; MEM64-NEXT: i64.extend_i32_u +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load32_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %q = ptrtoint ptr %p to i32 %r = add nuw i32 %q, 24 %s = inttoptr i32 %r to ptr @@ -551,6 +878,16 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load32_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i32_with_folded_gep_offset: +; MEM64: .functype load_lane_i32_with_folded_gep_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i64.const 24 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load32_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr inbounds i32, ptr %p, i32 6 %x = load i32, ptr %s %t = insertelement <4 x i32> %v, i32 %x, i32 0 @@ -567,6 +904,16 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load32_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i32_with_unfolded_gep_negative_offset: +; MEM64: .functype load_lane_i32_with_unfolded_gep_negative_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i64.const -24 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load32_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr inbounds i32, ptr %p, i32 -6 %x = load i32, ptr %s %t = insertelement <4 x i32> %v, i32 %x, i32 0 @@ -583,6 +930,18 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load32_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i32_with_unfolded_offset: +; MEM64: .functype load_lane_i32_with_unfolded_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i32.wrap_i64 +; MEM64-NEXT: i32.const 24 +; MEM64-NEXT: i32.add +; MEM64-NEXT: i64.extend_i32_u +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load32_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %q = ptrtoint ptr %p to i32 %r = add nsw i32 %q, 24 %s = inttoptr i32 %r to ptr @@ -601,6 +960,16 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load32_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i32_with_unfolded_gep_offset: +; MEM64: .functype load_lane_i32_with_unfolded_gep_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i64.const 24 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load32_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr i32, ptr %p, i32 6 %x = load i32, ptr %s %t = insertelement <4 x i32> %v, i32 %x, i32 0 @@ -615,6 +984,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.load32_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i32_from_numeric_address: +; MEM64: .functype load_lane_i32_from_numeric_address (v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: i64.const 42 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.load32_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = inttoptr i32 42 to ptr %x = load i32, ptr %s %t = insertelement <4 x i32> %v, i32 %x, i32 0 @@ -630,6 +1007,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.load32_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i32_from_global_address: +; MEM64: .functype load_lane_i32_from_global_address (v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: i64.const gv_i32 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.load32_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %x = load i32, ptr @gv_i32 %t = insertelement <4 x i32> %v, i32 %x, i32 0 ret <4 x i32> %t @@ -643,6 +1028,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store32_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i32_no_offset: +; MEM64: .functype store_lane_i32_no_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store32_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %x = extractelement <4 x i32> %v, i32 0 store i32 %x, ptr %p ret void @@ -656,6 +1049,18 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store32_lane 24, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i32_with_folded_offset: +; MEM64: .functype store_lane_i32_with_folded_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: i32.wrap_i64 +; MEM64-NEXT: i32.const 24 +; MEM64-NEXT: i32.add +; MEM64-NEXT: i64.extend_i32_u +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store32_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %q = ptrtoint ptr %p to i32 %r = add nuw i32 %q, 24 %s = inttoptr i32 %r to ptr @@ -672,6 +1077,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store32_lane 24, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i32_with_folded_gep_offset: +; MEM64: .functype store_lane_i32_with_folded_gep_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store32_lane 24, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr inbounds i32, ptr %p, i32 6 %x = extractelement <4 x i32> %v, i32 0 store i32 %x, ptr %s @@ -688,6 +1101,16 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store32_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i32_with_unfolded_gep_negative_offset: +; MEM64: .functype store_lane_i32_with_unfolded_gep_negative_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: i64.const -24 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store32_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr inbounds i32, ptr %p, i32 -6 %x = extractelement <4 x i32> %v, i32 0 store i32 %x, ptr %s @@ -704,6 +1127,18 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store32_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i32_with_unfolded_offset: +; MEM64: .functype store_lane_i32_with_unfolded_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: i32.wrap_i64 +; MEM64-NEXT: i32.const 24 +; MEM64-NEXT: i32.add +; MEM64-NEXT: i64.extend_i32_u +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store32_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %q = ptrtoint ptr %p to i32 %r = add nsw i32 %q, 24 %s = inttoptr i32 %r to ptr @@ -722,6 +1157,16 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store32_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i32_with_unfolded_gep_offset: +; MEM64: .functype store_lane_i32_with_unfolded_gep_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: i64.const 24 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store32_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr i32, ptr %p, i32 6 %x = extractelement <4 x i32> %v, i32 0 store i32 %x, ptr %s @@ -736,6 +1181,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store32_lane 42, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i32_to_numeric_address: +; MEM64: .functype store_lane_i32_to_numeric_address (v128) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: i64.const 0 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store32_lane 42, 0 +; MEM64-NEXT: # fallthrough-return %s = inttoptr i32 42 to ptr %x = extractelement <4 x i32> %v, i32 0 store i32 %x, ptr %s @@ -750,6 +1203,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store32_lane gv_i32, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i32_from_global_address: +; MEM64: .functype store_lane_i32_from_global_address (v128) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: i64.const 0 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store32_lane gv_i32, 0 +; MEM64-NEXT: # fallthrough-return %x = extractelement <4 x i32> %v, i32 0 store i32 %x, ptr @gv_i32 ret void @@ -767,6 +1228,14 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load64_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i64_no_offset: +; MEM64: .functype load_lane_i64_no_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load64_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %x = load i64, ptr %p %t = insertelement <2 x i64> %v, i64 %x, i32 0 ret <2 x i64> %t @@ -782,6 +1251,18 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load64_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i64_with_folded_offset: +; MEM64: .functype load_lane_i64_with_folded_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i32.wrap_i64 +; MEM64-NEXT: i32.const 24 +; MEM64-NEXT: i32.add +; MEM64-NEXT: i64.extend_i32_u +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load64_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %q = ptrtoint ptr %p to i32 %r = add nuw i32 %q, 24 %s = inttoptr i32 %r to ptr @@ -800,6 +1281,16 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load64_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i64_with_folded_gep_offset: +; MEM64: .functype load_lane_i64_with_folded_gep_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i64.const 48 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load64_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr inbounds i64, ptr %p, i32 6 %x = load i64, ptr %s %t = insertelement <2 x i64> %v, i64 %x, i32 0 @@ -816,6 +1307,16 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load64_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i64_with_unfolded_gep_negative_offset: +; MEM64: .functype load_lane_i64_with_unfolded_gep_negative_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i64.const -48 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load64_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr inbounds i64, ptr %p, i32 -6 %x = load i64, ptr %s %t = insertelement <2 x i64> %v, i64 %x, i32 0 @@ -832,6 +1333,18 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load64_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i64_with_unfolded_offset: +; MEM64: .functype load_lane_i64_with_unfolded_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i32.wrap_i64 +; MEM64-NEXT: i32.const 24 +; MEM64-NEXT: i32.add +; MEM64-NEXT: i64.extend_i32_u +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load64_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %q = ptrtoint ptr %p to i32 %r = add nsw i32 %q, 24 %s = inttoptr i32 %r to ptr @@ -850,6 +1363,16 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: v128.load64_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i64_with_unfolded_gep_offset: +; MEM64: .functype load_lane_i64_with_unfolded_gep_offset (i64, v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: i64.const 48 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: v128.load64_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr i64, ptr %p, i32 6 %x = load i64, ptr %s %t = insertelement <2 x i64> %v, i64 %x, i32 0 @@ -864,6 +1387,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.load64_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i64_from_numeric_address: +; MEM64: .functype load_lane_i64_from_numeric_address (v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: i64.const 42 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.load64_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = inttoptr i32 42 to ptr %x = load i64, ptr %s %t = insertelement <2 x i64> %v, i64 %x, i32 0 @@ -879,6 +1410,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.load64_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: load_lane_i64_from_global_address: +; MEM64: .functype load_lane_i64_from_global_address (v128) -> (v128) +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: i64.const gv_i64 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.load64_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %x = load i64, ptr @gv_i64 %t = insertelement <2 x i64> %v, i64 %x, i32 0 ret <2 x i64> %t @@ -892,6 +1431,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store64_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i64_no_offset: +; MEM64: .functype store_lane_i64_no_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store64_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %x = extractelement <2 x i64> %v, i32 0 store i64 %x, ptr %p ret void @@ -905,6 +1452,18 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store64_lane 24, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i64_with_folded_offset: +; MEM64: .functype store_lane_i64_with_folded_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: i32.wrap_i64 +; MEM64-NEXT: i32.const 24 +; MEM64-NEXT: i32.add +; MEM64-NEXT: i64.extend_i32_u +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store64_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %q = ptrtoint ptr %p to i32 %r = add nuw i32 %q, 24 %s = inttoptr i32 %r to ptr @@ -921,6 +1480,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store64_lane 48, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i64_with_folded_gep_offset: +; MEM64: .functype store_lane_i64_with_folded_gep_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store64_lane 48, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr inbounds i64, ptr %p, i32 6 %x = extractelement <2 x i64> %v, i32 0 store i64 %x, ptr %s @@ -937,6 +1504,16 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store64_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i64_with_unfolded_gep_negative_offset: +; MEM64: .functype store_lane_i64_with_unfolded_gep_negative_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: i64.const -48 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store64_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr inbounds i64, ptr %p, i32 -6 %x = extractelement <2 x i64> %v, i32 0 store i64 %x, ptr %s @@ -953,6 +1530,18 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store64_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i64_with_unfolded_offset: +; MEM64: .functype store_lane_i64_with_unfolded_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: i32.wrap_i64 +; MEM64-NEXT: i32.const 24 +; MEM64-NEXT: i32.add +; MEM64-NEXT: i64.extend_i32_u +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store64_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %q = ptrtoint ptr %p to i32 %r = add nsw i32 %q, 24 %s = inttoptr i32 %r to ptr @@ -971,6 +1560,16 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store64_lane 0, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i64_with_unfolded_gep_offset: +; MEM64: .functype store_lane_i64_with_unfolded_gep_offset (v128, i64) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: local.get 1 +; MEM64-NEXT: i64.const 48 +; MEM64-NEXT: i64.add +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store64_lane 0, 0 +; MEM64-NEXT: # fallthrough-return %s = getelementptr i64, ptr %p, i32 6 %x = extractelement <2 x i64> %v, i32 0 store i64 %x, ptr %s @@ -985,6 +1584,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store64_lane 42, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i64_to_numeric_address: +; MEM64: .functype store_lane_i64_to_numeric_address (v128) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: i64.const 0 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store64_lane 42, 0 +; MEM64-NEXT: # fallthrough-return %s = inttoptr i32 42 to ptr %x = extractelement <2 x i64> %v, i32 0 store i64 %x, ptr %s @@ -999,6 +1606,14 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.store64_lane gv_i64, 0 ; CHECK-NEXT: # fallthrough-return +; +; MEM64-LABEL: store_lane_i64_from_global_address: +; MEM64: .functype store_lane_i64_from_global_address (v128) -> () +; MEM64-NEXT: # %bb.0: +; MEM64-NEXT: i64.const 0 +; MEM64-NEXT: local.get 0 +; MEM64-NEXT: v128.store64_lane gv_i64, 0 +; MEM64-NEXT: # fallthrough-return %x = extractelement <2 x i64> %v, i32 0 store i64 %x, ptr @gv_i64 ret void