diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2591,16 +2591,16 @@ [llvm_nxv16i1_ty, llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty], []>; class SME_Load_Store_H_Intrinsic : DefaultAttrsIntrinsic<[], - [llvm_nxv16i1_ty, LLVMPointerType, llvm_i64_ty, llvm_i32_ty], []>; + [llvm_nxv16i1_ty, llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty], []>; class SME_Load_Store_S_Intrinsic : DefaultAttrsIntrinsic<[], - [llvm_nxv16i1_ty, LLVMPointerType, llvm_i64_ty, llvm_i32_ty], []>; + [llvm_nxv16i1_ty, llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty], []>; class SME_Load_Store_D_Intrinsic : DefaultAttrsIntrinsic<[], - [llvm_nxv16i1_ty, LLVMPointerType, llvm_i64_ty, llvm_i32_ty], []>; + [llvm_nxv16i1_ty, llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty], []>; class SME_Load_Store_Q_Intrinsic : DefaultAttrsIntrinsic<[], - [llvm_nxv16i1_ty, LLVMPointerType, llvm_i64_ty, llvm_i32_ty], []>; + [llvm_nxv16i1_ty, llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty], []>; // Loads def int_aarch64_sme_ld1b_horiz : SME_Load_Store_B_Intrinsic; diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s -define void @ld1b( %pg, i8* %ptr, i32 %sliceidx) { +define void @ld1b( %pg, ptr %ptr, i32 %sliceidx) { ; CHECK-LABEL: ld1b: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w1 @@ -10,12 +10,12 @@ ; CHECK-NEXT: ld1b {za0v.b[w13, 0]}, p0/z, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 15 - call void @llvm.aarch64.sme.ld1b.horiz( %pg, i8* %ptr, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.ld1b.vert( %pg, i8* %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.ld1b.horiz( %pg, ptr %ptr, i64 0, i32 %tileslice) + call void @llvm.aarch64.sme.ld1b.vert( %pg, ptr %ptr, i64 0, i32 0) ret void; } -define void @ld1b_with_addr_offset( %pg, i8* %ptr, i64 %index, i32 %sliceidx) { +define void @ld1b_with_addr_offset( %pg, ptr %ptr, i64 %index, i32 %sliceidx) { ; CHECK-LABEL: ld1b_with_addr_offset: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, wzr @@ -23,14 +23,14 @@ ; CHECK-NEXT: ld1b {za0h.b[w12, 0]}, p0/z, [x0, x1] ; CHECK-NEXT: ld1b {za0v.b[w13, 15]}, p0/z, [x0, x1] ; CHECK-NEXT: ret - %base = getelementptr i8, i8* %ptr, i64 %index + %base = getelementptr i8, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 15 - call void @llvm.aarch64.sme.ld1b.horiz( %pg, i8* %base, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1b.vert( %pg, i8* %base, i64 0, i32 %tileslice) + call void @llvm.aarch64.sme.ld1b.horiz( %pg, ptr %base, i64 0, i32 0) + call void @llvm.aarch64.sme.ld1b.vert( %pg, ptr %base, i64 0, i32 %tileslice) ret void; } -define void @ld1h( %pg, i16* %ptr, i32 %sliceidx) { +define void @ld1h( %pg, ptr %ptr, i32 %sliceidx) { ; CHECK-LABEL: ld1h: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w1 @@ -41,14 +41,14 @@ ; CHECK-NEXT: ld1h {za1v.h[w12, 7]}, p0/z, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 7 - call void @llvm.aarch64.sme.ld1h.horiz( %pg, i16* %ptr, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.ld1h.horiz( %pg, i16* %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1h.vert( %pg, i16* %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1h.vert( %pg, i16* %ptr, i64 1, i32 %tileslice) + call void @llvm.aarch64.sme.ld1h.horiz( %pg, ptr %ptr, i64 0, i32 %tileslice) + call void @llvm.aarch64.sme.ld1h.horiz( %pg, ptr %ptr, i64 1, i32 0) + call void @llvm.aarch64.sme.ld1h.vert( %pg, ptr %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.ld1h.vert( %pg, ptr %ptr, i64 1, i32 %tileslice) ret void; } -define void @ld1h_with_addr_offset( %pg, i16* %ptr, i64 %index, i32 %sliceidx) { +define void @ld1h_with_addr_offset( %pg, ptr %ptr, i64 %index, i32 %sliceidx) { ; CHECK-LABEL: ld1h_with_addr_offset: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w2 @@ -56,14 +56,14 @@ ; CHECK-NEXT: ld1h {za0h.h[w12, 7]}, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ld1h {za1v.h[w13, 0]}, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret - %base = getelementptr i16, i16* %ptr, i64 %index + %base = getelementptr i16, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 7 - call void @llvm.aarch64.sme.ld1h.horiz( %pg, i16* %base, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.ld1h.vert( %pg, i16* %base, i64 1, i32 0) + call void @llvm.aarch64.sme.ld1h.horiz( %pg, ptr %base, i64 0, i32 %tileslice) + call void @llvm.aarch64.sme.ld1h.vert( %pg, ptr %base, i64 1, i32 0) ret void; } -define void @ld1w( %pg, i32* %ptr, i32 %sliceidx) { +define void @ld1w( %pg, ptr %ptr, i32 %sliceidx) { ; CHECK-LABEL: ld1w: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, wzr @@ -78,18 +78,18 @@ ; CHECK-NEXT: ld1w {za3v.s[w12, 0]}, p0/z, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 3 - call void @llvm.aarch64.sme.ld1w.horiz( %pg, i32* %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1w.horiz( %pg, i32* %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1w.horiz( %pg, i32* %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.ld1w.horiz( %pg, i32* %ptr, i64 3, i32 %tileslice) - call void @llvm.aarch64.sme.ld1w.vert( %pg, i32* %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1w.vert( %pg, i32* %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1w.vert( %pg, i32* %ptr, i64 2, i32 %tileslice) - call void @llvm.aarch64.sme.ld1w.vert( %pg, i32* %ptr, i64 3, i32 0) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i64 1, i32 0) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i64 2, i32 0) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i64 3, i32 %tileslice) + call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i64 1, i32 0) + call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i64 2, i32 %tileslice) + call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i64 3, i32 0) ret void; } -define void @ld1w_with_addr_offset( %pg, i32* %ptr, i64 %index, i32 %sliceidx) { +define void @ld1w_with_addr_offset( %pg, ptr %ptr, i64 %index, i32 %sliceidx) { ; CHECK-LABEL: ld1w_with_addr_offset: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w2 @@ -97,14 +97,14 @@ ; CHECK-NEXT: ld1w {za0h.s[w13, 0]}, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: ld1w {za3v.s[w12, 3]}, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: ret - %base = getelementptr i32, i32* %ptr, i64 %index + %base = getelementptr i32, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 3 - call void @llvm.aarch64.sme.ld1w.horiz( %pg, i32* %base, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1w.vert( %pg, i32* %base, i64 3, i32 %tileslice) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %base, i64 0, i32 0) + call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %base, i64 3, i32 %tileslice) ret void; } -define void @ld1d( %pg, i64* %ptr, i32 %sliceidx) { +define void @ld1d( %pg, ptr %ptr, i32 %sliceidx) { ; CHECK-LABEL: ld1d: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w13, wzr @@ -127,26 +127,26 @@ ; CHECK-NEXT: ld1d {za7v.d[w12, 1]}, p0/z, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 1 - call void @llvm.aarch64.sme.ld1d.horiz( %pg, i64* %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, i64* %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, i64* %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, i64* %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, i64* %ptr, i64 4, i32 %tileslice) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, i64* %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, i64* %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, i64* %ptr, i64 7, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, i64* %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, i64* %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, i64* %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, i64* %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, i64* %ptr, i64 4, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, i64* %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, i64* %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, i64* %ptr, i64 7, i32 %tileslice) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 1, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 2, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 3, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 4, i32 %tileslice) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 5, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 6, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 7, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 1, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 2, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 3, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 4, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 5, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 6, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 7, i32 %tileslice) ret void; } -define void @ld1d_with_addr_offset( %pg, i64* %ptr, i64 %index, i32 %sliceidx) { +define void @ld1d_with_addr_offset( %pg, ptr %ptr, i64 %index, i32 %sliceidx) { ; CHECK-LABEL: ld1d_with_addr_offset: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w2 @@ -154,14 +154,14 @@ ; CHECK-NEXT: ld1d {za0h.d[w12, 1]}, p0/z, [x0, x1, lsl #3] ; CHECK-NEXT: ld1d {za7v.d[w13, 0]}, p0/z, [x0, x1, lsl #3] ; CHECK-NEXT: ret - %base = getelementptr i64, i64* %ptr, i64 %index + %base = getelementptr i64, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 1 - call void @llvm.aarch64.sme.ld1d.horiz( %pg, i64* %base, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.ld1d.vert( %pg, i64* %base, i64 7, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %base, i64 0, i32 %tileslice) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %base, i64 7, i32 0) ret void; } -define void @ld1q( %pg, i128* %ptr) { +define void @ld1q( %pg, ptr %ptr) { ; CHECK-LABEL: ld1q: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, wzr @@ -198,77 +198,77 @@ ; CHECK-NEXT: ld1q {za14v.q[w12, 0]}, p0/z, [x0] ; CHECK-NEXT: ld1q {za15v.q[w12, 0]}, p0/z, [x0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.ld1q.horiz( %pg, i128* %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, i128* %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, i128* %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, i128* %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, i128* %ptr, i64 4, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, i128* %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, i128* %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, i128* %ptr, i64 7, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, i128* %ptr, i64 8, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, i128* %ptr, i64 9, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, i128* %ptr, i64 10, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, i128* %ptr, i64 11, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, i128* %ptr, i64 12, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, i128* %ptr, i64 13, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, i128* %ptr, i64 14, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, i128* %ptr, i64 15, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, i128* %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, i128* %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, i128* %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, i128* %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, i128* %ptr, i64 4, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, i128* %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, i128* %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, i128* %ptr, i64 7, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, i128* %ptr, i64 8, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, i128* %ptr, i64 9, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, i128* %ptr, i64 10, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, i128* %ptr, i64 11, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, i128* %ptr, i64 12, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, i128* %ptr, i64 13, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, i128* %ptr, i64 14, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, i128* %ptr, i64 15, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 1, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 2, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 3, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 4, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 5, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 6, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 7, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 8, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 9, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 10, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 11, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 12, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 13, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 14, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 15, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 1, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 2, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 3, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 4, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 5, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 6, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 7, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 8, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 9, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 10, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 11, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 12, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 13, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 14, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 15, i32 0) ret void; } -define void @ld1q_with_addr_offset( %pg, i128* %ptr, i64 %index) { +define void @ld1q_with_addr_offset( %pg, ptr %ptr, i64 %index) { ; CHECK-LABEL: ld1q_with_addr_offset: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: ld1q {za0h.q[w12, 0]}, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ld1q {za15v.q[w12, 0]}, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret - %base = getelementptr i128, i128* %ptr, i64 %index - call void @llvm.aarch64.sme.ld1q.horiz( %pg, i128* %base, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, i128* %base, i64 15, i32 0) + %base = getelementptr i128, ptr %ptr, i64 %index + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %base, i64 0, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %base, i64 15, i32 0) ret void; } -define void @ldr(i8* %ptr) { +define void @ldr(ptr %ptr) { ; CHECK-LABEL: ldr: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: ldr za[w12, 0], [x0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.ldr(i32 0, i8* %ptr) + call void @llvm.aarch64.sme.ldr(i32 0, ptr %ptr) ret void; } -define void @ldr_with_off_15(i8* %ptr) { +define void @ldr_with_off_15(ptr %ptr) { ; CHECK-LABEL: ldr_with_off_15: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: add x8, x0, #15 ; CHECK-NEXT: ldr za[w12, 0], [x8] ; CHECK-NEXT: ret - %base = getelementptr i8, i8* %ptr, i64 15 - call void @llvm.aarch64.sme.ldr(i32 0, i8* %base) + %base = getelementptr i8, ptr %ptr, i64 15 + call void @llvm.aarch64.sme.ldr(i32 0, ptr %base) ret void; } -define void @ldr_with_off_15mulvl(i8* %ptr) { +define void @ldr_with_off_15mulvl(ptr %ptr) { ; CHECK-LABEL: ldr_with_off_15mulvl: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, wzr @@ -276,12 +276,12 @@ ; CHECK-NEXT: ret %vscale = call i64 @llvm.vscale.i64() %mulvl = mul i64 %vscale, 240 - %base = getelementptr i8, i8* %ptr, i64 %mulvl - call void @llvm.aarch64.sme.ldr(i32 0, i8* %base) + %base = getelementptr i8, ptr %ptr, i64 %mulvl + call void @llvm.aarch64.sme.ldr(i32 0, ptr %base) ret void; } -define void @ldr_with_off_16mulvl(i8* %ptr) { +define void @ldr_with_off_16mulvl(ptr %ptr) { ; CHECK-LABEL: ldr_with_off_16mulvl: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, wzr @@ -290,21 +290,21 @@ ; CHECK-NEXT: ret %vscale = call i64 @llvm.vscale.i64() %mulvl = mul i64 %vscale, 256 - %base = getelementptr i8, i8* %ptr, i64 %mulvl - call void @llvm.aarch64.sme.ldr(i32 0, i8* %base) + %base = getelementptr i8, ptr %ptr, i64 %mulvl + call void @llvm.aarch64.sme.ldr(i32 0, ptr %base) ret void; } -declare void @llvm.aarch64.sme.ld1b.horiz(, i8*, i64, i32) -declare void @llvm.aarch64.sme.ld1h.horiz(, i16*, i64, i32) -declare void @llvm.aarch64.sme.ld1w.horiz(, i32*, i64, i32) -declare void @llvm.aarch64.sme.ld1d.horiz(, i64*, i64, i32) -declare void @llvm.aarch64.sme.ld1q.horiz(, i128*, i64, i32) -declare void @llvm.aarch64.sme.ld1b.vert(, i8*, i64, i32) -declare void @llvm.aarch64.sme.ld1h.vert(, i16*, i64, i32) -declare void @llvm.aarch64.sme.ld1w.vert(, i32*, i64, i32) -declare void @llvm.aarch64.sme.ld1d.vert(, i64*, i64, i32) -declare void @llvm.aarch64.sme.ld1q.vert(, i128*, i64, i32) +declare void @llvm.aarch64.sme.ld1b.horiz(, ptr, i64, i32) +declare void @llvm.aarch64.sme.ld1h.horiz(, ptr, i64, i32) +declare void @llvm.aarch64.sme.ld1w.horiz(, ptr, i64, i32) +declare void @llvm.aarch64.sme.ld1d.horiz(, ptr, i64, i32) +declare void @llvm.aarch64.sme.ld1q.horiz(, ptr, i64, i32) +declare void @llvm.aarch64.sme.ld1b.vert(, ptr, i64, i32) +declare void @llvm.aarch64.sme.ld1h.vert(, ptr, i64, i32) +declare void @llvm.aarch64.sme.ld1w.vert(, ptr, i64, i32) +declare void @llvm.aarch64.sme.ld1d.vert(, ptr, i64, i32) +declare void @llvm.aarch64.sme.ld1q.vert(, ptr, i64, i32) -declare void @llvm.aarch64.sme.ldr(i32, i8*) +declare void @llvm.aarch64.sme.ldr(i32, ptr) declare i64 @llvm.vscale.i64() diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s -define void @st1b( %pg, i8* %ptr, i32 %sliceidx) { +define void @st1b( %pg, ptr %ptr, i32 %sliceidx) { ; CHECK-LABEL: st1b: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w1 @@ -10,12 +10,12 @@ ; CHECK-NEXT: st1b {za0v.b[w13, 0]}, p0, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 15 - call void @llvm.aarch64.sme.st1b.horiz( %pg, i8* %ptr, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.st1b.vert( %pg, i8* %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.st1b.horiz( %pg, ptr %ptr, i64 0, i32 %tileslice) + call void @llvm.aarch64.sme.st1b.vert( %pg, ptr %ptr, i64 0, i32 0) ret void; } -define void @st1b_with_addr_offset( %pg, i8* %ptr, i64 %index, i32 %sliceidx) { +define void @st1b_with_addr_offset( %pg, ptr %ptr, i64 %index, i32 %sliceidx) { ; CHECK-LABEL: st1b_with_addr_offset: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, wzr @@ -23,14 +23,14 @@ ; CHECK-NEXT: st1b {za0h.b[w12, 0]}, p0, [x0, x1] ; CHECK-NEXT: st1b {za0v.b[w13, 15]}, p0, [x0, x1] ; CHECK-NEXT: ret - %base = getelementptr i8, i8* %ptr, i64 %index + %base = getelementptr i8, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 15 - call void @llvm.aarch64.sme.st1b.horiz( %pg, i8* %base, i64 0, i32 0) - call void @llvm.aarch64.sme.st1b.vert( %pg, i8* %base, i64 0, i32 %tileslice) + call void @llvm.aarch64.sme.st1b.horiz( %pg, ptr %base, i64 0, i32 0) + call void @llvm.aarch64.sme.st1b.vert( %pg, ptr %base, i64 0, i32 %tileslice) ret void; } -define void @st1h( %pg, i16* %ptr, i32 %sliceidx) { +define void @st1h( %pg, ptr %ptr, i32 %sliceidx) { ; CHECK-LABEL: st1h: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w1 @@ -41,14 +41,14 @@ ; CHECK-NEXT: st1h {za1v.h[w12, 7]}, p0, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 7 - call void @llvm.aarch64.sme.st1h.horiz( %pg, i16* %ptr, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.st1h.horiz( %pg, i16* %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1h.vert( %pg, i16* %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1h.vert( %pg, i16* %ptr, i64 1, i32 %tileslice) + call void @llvm.aarch64.sme.st1h.horiz( %pg, ptr %ptr, i64 0, i32 %tileslice) + call void @llvm.aarch64.sme.st1h.horiz( %pg, ptr %ptr, i64 1, i32 0) + call void @llvm.aarch64.sme.st1h.vert( %pg, ptr %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.st1h.vert( %pg, ptr %ptr, i64 1, i32 %tileslice) ret void; } -define void @st1h_with_addr_offset( %pg, i16* %ptr, i64 %index, i32 %sliceidx) { +define void @st1h_with_addr_offset( %pg, ptr %ptr, i64 %index, i32 %sliceidx) { ; CHECK-LABEL: st1h_with_addr_offset: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w2 @@ -56,14 +56,14 @@ ; CHECK-NEXT: st1h {za0h.h[w12, 7]}, p0, [x0, x1, lsl #1] ; CHECK-NEXT: st1h {za1v.h[w13, 0]}, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret - %base = getelementptr i16, i16* %ptr, i64 %index + %base = getelementptr i16, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 7 - call void @llvm.aarch64.sme.st1h.horiz( %pg, i16* %base, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.st1h.vert( %pg, i16* %base, i64 1, i32 0) + call void @llvm.aarch64.sme.st1h.horiz( %pg, ptr %base, i64 0, i32 %tileslice) + call void @llvm.aarch64.sme.st1h.vert( %pg, ptr %base, i64 1, i32 0) ret void; } -define void @st1w( %pg, i32* %ptr, i32 %sliceidx) { +define void @st1w( %pg, ptr %ptr, i32 %sliceidx) { ; CHECK-LABEL: st1w: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w13, wzr @@ -78,18 +78,18 @@ ; CHECK-NEXT: st1w {za3v.s[w13, 0]}, p0, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 3 - call void @llvm.aarch64.sme.st1w.horiz( %pg, i32* %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1w.horiz( %pg, i32* %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1w.horiz( %pg, i32* %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.st1w.horiz( %pg, i32* %ptr, i64 3, i32 %tileslice) - call void @llvm.aarch64.sme.st1w.vert( %pg, i32* %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1w.vert( %pg, i32* %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1w.vert( %pg, i32* %ptr, i64 2, i32 %tileslice) - call void @llvm.aarch64.sme.st1w.vert( %pg, i32* %ptr, i64 3, i32 0) + call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i64 1, i32 0) + call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i64 2, i32 0) + call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i64 3, i32 %tileslice) + call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i64 1, i32 0) + call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i64 2, i32 %tileslice) + call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i64 3, i32 0) ret void; } -define void @st1w_with_addr_offset( %pg, i32* %ptr, i64 %index, i32 %sliceidx) { +define void @st1w_with_addr_offset( %pg, ptr %ptr, i64 %index, i32 %sliceidx) { ; CHECK-LABEL: st1w_with_addr_offset: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, wzr @@ -97,14 +97,14 @@ ; CHECK-NEXT: st1w {za0h.s[w12, 0]}, p0, [x0, x1, lsl #2] ; CHECK-NEXT: st1w {za3v.s[w13, 3]}, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret - %base = getelementptr i32, i32* %ptr, i64 %index + %base = getelementptr i32, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 3 - call void @llvm.aarch64.sme.st1w.horiz( %pg, i32* %base, i64 0, i32 0) - call void @llvm.aarch64.sme.st1w.vert( %pg, i32* %base, i64 3, i32 %tileslice) + call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %base, i64 0, i32 0) + call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %base, i64 3, i32 %tileslice) ret void; } -define void @st1d( %pg, i64* %ptr, i32 %sliceidx) { +define void @st1d( %pg, ptr %ptr, i32 %sliceidx) { ; CHECK-LABEL: st1d: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w13, wzr @@ -127,26 +127,26 @@ ; CHECK-NEXT: st1d {za7v.d[w12, 1]}, p0, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 1 - call void @llvm.aarch64.sme.st1d.horiz( %pg, i64* %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1d.horiz( %pg, i64* %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1d.horiz( %pg, i64* %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.st1d.horiz( %pg, i64* %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.st1d.horiz( %pg, i64* %ptr, i64 4, i32 %tileslice) - call void @llvm.aarch64.sme.st1d.horiz( %pg, i64* %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.st1d.horiz( %pg, i64* %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.st1d.horiz( %pg, i64* %ptr, i64 7, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, i64* %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, i64* %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, i64* %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, i64* %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, i64* %ptr, i64 4, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, i64* %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, i64* %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, i64* %ptr, i64 7, i32 %tileslice) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 1, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 2, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 3, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 4, i32 %tileslice) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 5, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 6, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 7, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 1, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 2, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 3, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 4, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 5, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 6, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 7, i32 %tileslice) ret void; } -define void @st1d_with_addr_offset( %pg, i64* %ptr, i64 %index, i32 %sliceidx) { +define void @st1d_with_addr_offset( %pg, ptr %ptr, i64 %index, i32 %sliceidx) { ; CHECK-LABEL: st1d_with_addr_offset: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w2 @@ -154,14 +154,14 @@ ; CHECK-NEXT: st1d {za0h.d[w12, 1]}, p0, [x0, x1, lsl #3] ; CHECK-NEXT: st1d {za7v.d[w13, 0]}, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret - %base = getelementptr i64, i64* %ptr, i64 %index + %base = getelementptr i64, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 1 - call void @llvm.aarch64.sme.st1d.horiz( %pg, i64* %base, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.st1d.vert( %pg, i64* %base, i64 7, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %base, i64 0, i32 %tileslice) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %base, i64 7, i32 0) ret void; } -define void @st1q( %pg, i128* %ptr) { +define void @st1q( %pg, ptr %ptr) { ; CHECK-LABEL: st1q: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, wzr @@ -198,77 +198,77 @@ ; CHECK-NEXT: st1q {za14v.q[w12, 0]}, p0, [x0] ; CHECK-NEXT: st1q {za15v.q[w12, 0]}, p0, [x0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.st1q.horiz( %pg, i128* %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, i128* %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, i128* %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, i128* %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, i128* %ptr, i64 4, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, i128* %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, i128* %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, i128* %ptr, i64 7, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, i128* %ptr, i64 8, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, i128* %ptr, i64 9, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, i128* %ptr, i64 10, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, i128* %ptr, i64 11, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, i128* %ptr, i64 12, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, i128* %ptr, i64 13, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, i128* %ptr, i64 14, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, i128* %ptr, i64 15, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, i128* %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, i128* %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, i128* %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, i128* %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, i128* %ptr, i64 4, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, i128* %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, i128* %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, i128* %ptr, i64 7, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, i128* %ptr, i64 8, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, i128* %ptr, i64 9, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, i128* %ptr, i64 10, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, i128* %ptr, i64 11, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, i128* %ptr, i64 12, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, i128* %ptr, i64 13, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, i128* %ptr, i64 14, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, i128* %ptr, i64 15, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 1, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 2, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 3, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 4, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 5, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 6, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 7, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 8, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 9, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 10, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 11, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 12, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 13, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 14, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 15, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 1, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 2, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 3, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 4, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 5, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 6, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 7, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 8, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 9, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 10, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 11, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 12, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 13, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 14, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 15, i32 0) ret void; } -define void @st1q_with_addr_offset( %pg, i128* %ptr, i64 %index) { +define void @st1q_with_addr_offset( %pg, ptr %ptr, i64 %index) { ; CHECK-LABEL: st1q_with_addr_offset: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: st1q {za0h.q[w12, 0]}, p0, [x0, x1, lsl #4] ; CHECK-NEXT: st1q {za15v.q[w12, 0]}, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret - %base = getelementptr i128, i128* %ptr, i64 %index - call void @llvm.aarch64.sme.st1q.horiz( %pg, i128* %base, i64 0, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, i128* %base, i64 15, i32 0) + %base = getelementptr i128, ptr %ptr, i64 %index + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %base, i64 0, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %base, i64 15, i32 0) ret void; } -define void @str(i8* %ptr) { +define void @str(ptr %ptr) { ; CHECK-LABEL: str: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: str za[w12, 0], [x0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.str(i32 0, i8* %ptr) + call void @llvm.aarch64.sme.str(i32 0, ptr %ptr) ret void; } -define void @str_with_off_15(i8* %ptr) { +define void @str_with_off_15(ptr %ptr) { ; CHECK-LABEL: str_with_off_15: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: add x8, x0, #15 ; CHECK-NEXT: str za[w12, 0], [x8] ; CHECK-NEXT: ret - %base = getelementptr i8, i8* %ptr, i64 15 - call void @llvm.aarch64.sme.str(i32 0, i8* %base) + %base = getelementptr i8, ptr %ptr, i64 15 + call void @llvm.aarch64.sme.str(i32 0, ptr %base) ret void; } -define void @str_with_off_15mulvl(i8* %ptr) { +define void @str_with_off_15mulvl(ptr %ptr) { ; CHECK-LABEL: str_with_off_15mulvl: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, wzr @@ -276,12 +276,12 @@ ; CHECK-NEXT: ret %vscale = call i64 @llvm.vscale.i64() %mulvl = mul i64 %vscale, 240 - %base = getelementptr i8, i8* %ptr, i64 %mulvl - call void @llvm.aarch64.sme.str(i32 0, i8* %base) + %base = getelementptr i8, ptr %ptr, i64 %mulvl + call void @llvm.aarch64.sme.str(i32 0, ptr %base) ret void; } -define void @str_with_off_16mulvl(i8* %ptr) { +define void @str_with_off_16mulvl(ptr %ptr) { ; CHECK-LABEL: str_with_off_16mulvl: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, wzr @@ -290,21 +290,21 @@ ; CHECK-NEXT: ret %vscale = call i64 @llvm.vscale.i64() %mulvl = mul i64 %vscale, 256 - %base = getelementptr i8, i8* %ptr, i64 %mulvl - call void @llvm.aarch64.sme.str(i32 0, i8* %base) + %base = getelementptr i8, ptr %ptr, i64 %mulvl + call void @llvm.aarch64.sme.str(i32 0, ptr %base) ret void; } -declare void @llvm.aarch64.sme.st1b.horiz(, i8*, i64, i32) -declare void @llvm.aarch64.sme.st1h.horiz(, i16*, i64, i32) -declare void @llvm.aarch64.sme.st1w.horiz(, i32*, i64, i32) -declare void @llvm.aarch64.sme.st1d.horiz(, i64*, i64, i32) -declare void @llvm.aarch64.sme.st1q.horiz(, i128*, i64, i32) -declare void @llvm.aarch64.sme.st1b.vert(, i8*, i64, i32) -declare void @llvm.aarch64.sme.st1h.vert(, i16*, i64, i32) -declare void @llvm.aarch64.sme.st1w.vert(, i32*, i64, i32) -declare void @llvm.aarch64.sme.st1d.vert(, i64*, i64, i32) -declare void @llvm.aarch64.sme.st1q.vert(, i128*, i64, i32) +declare void @llvm.aarch64.sme.st1b.horiz(, ptr, i64, i32) +declare void @llvm.aarch64.sme.st1h.horiz(, ptr, i64, i32) +declare void @llvm.aarch64.sme.st1w.horiz(, ptr, i64, i32) +declare void @llvm.aarch64.sme.st1d.horiz(, ptr, i64, i32) +declare void @llvm.aarch64.sme.st1q.horiz(, ptr, i64, i32) +declare void @llvm.aarch64.sme.st1b.vert(, ptr, i64, i32) +declare void @llvm.aarch64.sme.st1h.vert(, ptr, i64, i32) +declare void @llvm.aarch64.sme.st1w.vert(, ptr, i64, i32) +declare void @llvm.aarch64.sme.st1d.vert(, ptr, i64, i32) +declare void @llvm.aarch64.sme.st1q.vert(, ptr, i64, i32) -declare void @llvm.aarch64.sme.str(i32, i8*) +declare void @llvm.aarch64.sme.str(i32, ptr) declare i64 @llvm.vscale.i64()