diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -780,12 +780,6 @@ class AdvSIMD_1Vec_PredLoad_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMPointerTo<0>], - [IntrReadMem, IntrArgMemOnly]>; - - class AdvSIMD_1Vec_PredFaultingLoad_Intrinsic - : Intrinsic<[llvm_anyvector_ty], - [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerToElt<0>], [IntrReadMem, IntrArgMemOnly]>; @@ -793,7 +787,7 @@ : Intrinsic<[], [llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMPointerTo<0>], + LLVMPointerToElt<0>], [IntrArgMemOnly, NoCapture<2>]>; class AdvSIMD_SVE_Index_Intrinsic @@ -1289,9 +1283,8 @@ // def int_aarch64_sve_ldnt1 : AdvSIMD_1Vec_PredLoad_Intrinsic; - -def int_aarch64_sve_ldnf1 : AdvSIMD_1Vec_PredFaultingLoad_Intrinsic; -def int_aarch64_sve_ldff1 : AdvSIMD_1Vec_PredFaultingLoad_Intrinsic; +def int_aarch64_sve_ldnf1 : AdvSIMD_1Vec_PredLoad_Intrinsic; +def int_aarch64_sve_ldff1 : AdvSIMD_1Vec_PredLoad_Intrinsic; // // Stores diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8981,7 +8981,7 @@ case Intrinsic::aarch64_sve_ldnt1: { PointerType *PtrTy = cast(I.getArgOperand(1)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::getVT(PtrTy->getElementType()); + Info.memVT = MVT::getVT(I.getType()); Info.ptrVal = I.getArgOperand(1); Info.offset = 0; Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType())); @@ -8991,7 +8991,7 @@ case Intrinsic::aarch64_sve_stnt1: { PointerType *PtrTy = cast(I.getArgOperand(2)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::getVT(PtrTy->getElementType()); + Info.memVT = MVT::getVT(I.getOperand(0)->getType()); Info.ptrVal = I.getArgOperand(2); Info.offset = 0; Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType())); diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll @@ -4,12 +4,12 @@ ; LDNT1B ; -define @ldnt1b_i8( %pred, * %addr) { +define @ldnt1b_i8( %pred, i8* %addr) { ; CHECK-LABEL: ldnt1b_i8: ; CHECK: ldnt1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ldnt1.nxv16i8( %pred, - * %addr) + i8* %addr) ret %res } @@ -17,21 +17,21 @@ ; LDNT1H ; -define @ldnt1h_i16( %pred, * %addr) { +define @ldnt1h_i16( %pred, i16* %addr) { ; CHECK-LABEL: ldnt1h_i16: ; CHECK: ldnt1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ldnt1.nxv8i16( %pred, - * %addr) + i16* %addr) ret %res } -define @ldnt1h_f16( %pred, * %addr) { +define @ldnt1h_f16( %pred, half* %addr) { ; CHECK-LABEL: ldnt1h_f16: ; CHECK: ldnt1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ldnt1.nxv8f16( %pred, - * %addr) + half* %addr) ret %res } @@ -39,21 +39,21 @@ ; LDNT1W ; -define @ldnt1w_i32( %pred, * %addr) { +define @ldnt1w_i32( %pred, i32* %addr) { ; CHECK-LABEL: ldnt1w_i32: ; CHECK: ldnt1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ldnt1.nxv4i32( %pred, - * %addr) + i32* %addr) ret %res } -define @ldnt1w_f32( %pred, * %addr) { +define @ldnt1w_f32( %pred, float* %addr) { ; CHECK-LABEL: ldnt1w_f32: ; CHECK: ldnt1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ldnt1.nxv4f32( %pred, - * %addr) + float* %addr) ret %res } @@ -61,28 +61,28 @@ ; LDNT1D ; -define @ldnt1d_i64( %pred, * %addr) { +define @ldnt1d_i64( %pred, i64* %addr) { ; CHECK-LABEL: ldnt1d_i64: ; CHECK: ldnt1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ldnt1.nxv2i64( %pred, - * %addr) + i64* %addr) ret %res } -define @ldnt1d_f64( %pred, * %addr) { +define @ldnt1d_f64( %pred, double* %addr) { ; CHECK-LABEL: ldnt1d_f64: ; CHECK: ldnt1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.ldnt1.nxv2f64( %pred, - * %addr) + double* %addr) ret %res } -declare @llvm.aarch64.sve.ldnt1.nxv16i8(, *) -declare @llvm.aarch64.sve.ldnt1.nxv8i16(, *) -declare @llvm.aarch64.sve.ldnt1.nxv4i32(, *) -declare @llvm.aarch64.sve.ldnt1.nxv2i64(, *) -declare @llvm.aarch64.sve.ldnt1.nxv8f16(, *) -declare @llvm.aarch64.sve.ldnt1.nxv4f32(, *) -declare @llvm.aarch64.sve.ldnt1.nxv2f64(, *) +declare @llvm.aarch64.sve.ldnt1.nxv16i8(, i8*) +declare @llvm.aarch64.sve.ldnt1.nxv8i16(, i16*) +declare @llvm.aarch64.sve.ldnt1.nxv4i32(, i32*) +declare @llvm.aarch64.sve.ldnt1.nxv2i64(, i64*) +declare @llvm.aarch64.sve.ldnt1.nxv8f16(, half*) +declare @llvm.aarch64.sve.ldnt1.nxv4f32(, float*) +declare @llvm.aarch64.sve.ldnt1.nxv2f64(, double*) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll @@ -4,13 +4,13 @@ ; STNT1B ; -define void @stnt1b_i8( %data, %pred, * %addr) { +define void @stnt1b_i8( %data, %pred, i8* %addr) { ; CHECK-LABEL: stnt1b_i8: ; CHECK: stnt1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.stnt1.nxv16i8( %data, %pred, - * %addr) + i8* %addr) ret void } @@ -18,23 +18,23 @@ ; STNT1H ; -define void @stnt1h_i16( %data, %pred, * %addr) { +define void @stnt1h_i16( %data, %pred, i16* %addr) { ; CHECK-LABEL: stnt1h_i16: ; CHECK: stnt1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.stnt1.nxv8i16( %data, %pred, - * %addr) + i16* %addr) ret void } -define void @stnt1h_f16( %data, %pred, * %addr) { +define void @stnt1h_f16( %data, %pred, half* %addr) { ; CHECK-LABEL: stnt1h_f16: ; CHECK: stnt1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.stnt1.nxv8f16( %data, %pred, - * %addr) + half* %addr) ret void } @@ -42,23 +42,23 @@ ; STNT1W ; -define void @stnt1w_i32( %data, %pred, * %addr) { +define void @stnt1w_i32( %data, %pred, i32* %addr) { ; CHECK-LABEL: stnt1w_i32: ; CHECK: stnt1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.stnt1.nxv4i32( %data, %pred, - * %addr) + i32* %addr) ret void } -define void @stnt1w_f32( %data, %pred, * %addr) { +define void @stnt1w_f32( %data, %pred, float* %addr) { ; CHECK-LABEL: stnt1w_f32: ; CHECK: stnt1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.stnt1.nxv4f32( %data, %pred, - * %addr) + float* %addr) ret void } @@ -66,30 +66,30 @@ ; STNT1D ; -define void @stnt1d_i64( %data, %pred, * %addr) { +define void @stnt1d_i64( %data, %pred, i64* %addr) { ; CHECK-LABEL: stnt1d_i64: ; CHECK: stnt1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.stnt1.nxv2i64( %data, %pred, - * %addr) + i64* %addr) ret void } -define void @stnt1d_f64( %data, %pred, * %addr) { +define void @stnt1d_f64( %data, %pred, double* %addr) { ; CHECK-LABEL: stnt1d_f64: ; CHECK: stnt1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.stnt1.nxv2f64( %data, %pred, - * %addr) + double* %addr) ret void } -declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , *) -declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , *) -declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , *) -declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , *) -declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , *) -declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , *) -declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , i8*) +declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , i16*) +declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , i32*) +declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , i64*) +declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , half*) +declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , float*) +declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , double*) diff --git a/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-imm.ll @@ -15,12 +15,14 @@ ; CHECK-NEXT: stnt1d { z[[DATA]].d }, p0, [x{{[0-9]+}}] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 8 + %base_load_bc = bitcast * %base_load to i64* %data = call @llvm.aarch64.sve.ldnt1.nxv2i64( %mask, - * %base_load) + i64* %base_load_bc) %base_store = getelementptr , * %base, i64 -9 + %base_store_bc = bitcast * %base_store to i64* call void @llvm.aarch64.sve.stnt1.nxv2i64( %data, %mask, - * %base_store) + i64* %base_store_bc) ret void } @@ -33,12 +35,14 @@ ; CHECK-NEXT: stnt1d { z[[DATA]].d }, p0, [x0, #-7, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -8 + %base_load_bc = bitcast * %base_load to i64* %data = call @llvm.aarch64.sve.ldnt1.nxv2i64( %mask, - * %base_load) + i64* %base_load_bc) %base_store = getelementptr , * %base, i64 -7 + %base_store_bc = bitcast * %base_store to i64* call void @llvm.aarch64.sve.stnt1.nxv2i64( %data, %mask, - * %base_store) + i64* %base_store_bc) ret void } @@ -48,12 +52,14 @@ ; CHECK-NEXT: stnt1d { z[[DATA]].d }, p0, [x0, #-5, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -6 + %base_load_bc = bitcast * %base_load to double* %data = call @llvm.aarch64.sve.ldnt1.nxv2f64( %mask, - * %base_load) + double* %base_load_bc) %base_store = getelementptr , * %base, i64 -5 + %base_store_bc = bitcast * %base_store to double* call void @llvm.aarch64.sve.stnt1.nxv2f64( %data, %mask, - * %base_store) + double* %base_store_bc) ret void } @@ -65,12 +71,14 @@ ; CHECK-NEXT: stnt1w { z[[DATA]].s }, p0, [x0, #7, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 6 + %base_load_bc = bitcast * %base_load to i32* %data = call @llvm.aarch64.sve.ldnt1.nxv4i32( %mask, - * %base_load) + i32* %base_load_bc) %base_store = getelementptr , * %base, i64 7 + %base_store_bc = bitcast * %base_store to i32* call void @llvm.aarch64.sve.stnt1.nxv4i32( %data, %mask, - * %base_store) + i32* %base_store_bc) ret void } @@ -80,12 +88,14 @@ ; CHECK-NEXT: stnt1w { z[[DATA]].s }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -1 + %base_load_bc = bitcast * %base_load to float* %data = call @llvm.aarch64.sve.ldnt1.nxv4f32( %mask, - * %base_load) + float* %base_load_bc) %base_store = getelementptr , * %base, i64 2 + %base_store_bc = bitcast * %base_store to float* call void @llvm.aarch64.sve.stnt1.nxv4f32( %data, %mask, - * %base_store) + float* %base_store_bc) ret void } @@ -98,12 +108,14 @@ ; CHECK-NEXT: stnt1h { z[[DATA]].h }, p0, [x0, #7, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 6 + %base_load_bc = bitcast * %base_load to i16* %data = call @llvm.aarch64.sve.ldnt1.nxv8i16( %mask, - * %base_load) + i16* %base_load_bc) %base_store = getelementptr , * %base, i64 7 + %base_store_bc = bitcast * %base_store to i16* call void @llvm.aarch64.sve.stnt1.nxv8i16( %data, %mask, - * %base_store) + i16* %base_store_bc) ret void } @@ -113,12 +125,14 @@ ; CHECK-NEXT: stnt1h { z[[DATA]].h }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -1 + %base_load_bc = bitcast * %base_load to half* %data = call @llvm.aarch64.sve.ldnt1.nxv8f16( %mask, - * %base_load) + half* %base_load_bc) %base_store = getelementptr , * %base, i64 2 + %base_store_bc = bitcast * %base_store to half* call void @llvm.aarch64.sve.stnt1.nxv8f16( %data, %mask, - * %base_store) + half* %base_store_bc) ret void } @@ -130,42 +144,44 @@ ; CHECK-NEXT: stnt1b { z[[DATA]].b }, p0, [x0, #7, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 6 + %base_load_bc = bitcast * %base_load to i8* %data = call @llvm.aarch64.sve.ldnt1.nxv16i8( %mask, - * %base_load) + i8* %base_load_bc) %base_store = getelementptr , * %base, i64 7 + %base_store_bc = bitcast * %base_store to i8* call void @llvm.aarch64.sve.stnt1.nxv16i8( %data, %mask, - * %base_store) + i8* %base_store_bc) ret void } ; 2-element non-temporal loads. -declare @llvm.aarch64.sve.ldnt1.nxv2i64(, *) -declare @llvm.aarch64.sve.ldnt1.nxv2f64(, *) +declare @llvm.aarch64.sve.ldnt1.nxv2i64(, i64*) +declare @llvm.aarch64.sve.ldnt1.nxv2f64(, double*) ; 4-element non-temporal loads. -declare @llvm.aarch64.sve.ldnt1.nxv4i32(, *) -declare @llvm.aarch64.sve.ldnt1.nxv4f32(, *) +declare @llvm.aarch64.sve.ldnt1.nxv4i32(, i32*) +declare @llvm.aarch64.sve.ldnt1.nxv4f32(, float*) ; 8-element non-temporal loads. -declare @llvm.aarch64.sve.ldnt1.nxv8i16(, *) -declare @llvm.aarch64.sve.ldnt1.nxv8f16(, *) +declare @llvm.aarch64.sve.ldnt1.nxv8i16(, i16*) +declare @llvm.aarch64.sve.ldnt1.nxv8f16(, half*) ; 16-element non-temporal loads. -declare @llvm.aarch64.sve.ldnt1.nxv16i8(, *) +declare @llvm.aarch64.sve.ldnt1.nxv16i8(, i8*) ; 2-element non-temporal stores. -declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , *) -declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , i64*) +declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , double*) ; 4-element non-temporal stores. -declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , *) -declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , i32*) +declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , float*) ; 8-element non-temporal stores. -declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , *) -declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , i16*) +declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , half*) ; 16-element non-temporal stores. -declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , i8*) diff --git a/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-reg.ll --- a/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-reg.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-reg.ll @@ -7,13 +7,12 @@ ; CHECK-NEXT: ldnt1d { z[[DATA:[0-9]+]].d }, p0/z, [x0, x1, lsl #3] ; CHECK-NEXT: stnt1d { z[[DATA]].d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret - %base_i64 = getelementptr i64, i64* %base, i64 %offset - %base_addr = bitcast i64* %base_i64 to * + %gep = getelementptr i64, i64* %base, i64 %offset %data = call @llvm.aarch64.sve.ldnt1.nxv2i64( %mask, - * %base_addr) + i64* %gep) call void @llvm.aarch64.sve.stnt1.nxv2i64( %data, %mask, - * %base_addr) + i64* %gep) ret void } @@ -22,13 +21,12 @@ ; CHECK-NEXT: ldnt1d { z[[DATA:[0-9]+]].d }, p0/z, [x0, x1, lsl #3] ; CHECK-NEXT: stnt1d { z[[DATA]].d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret - %base_double = getelementptr double, double* %base, i64 %offset - %base_addr = bitcast double* %base_double to * + %gep = getelementptr double, double* %base, i64 %offset %data = call @llvm.aarch64.sve.ldnt1.nxv2f64( %mask, - * %base_addr) + double* %gep) call void @llvm.aarch64.sve.stnt1.nxv2f64( %data, %mask, - * %base_addr) + double* %gep) ret void } @@ -39,13 +37,12 @@ ; CHECK-NEXT: ldnt1w { z[[DATA:[0-9]+]].s }, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: stnt1w { z[[DATA]].s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret - %base_i32 = getelementptr i32, i32* %base, i64 %offset - %base_addr = bitcast i32* %base_i32 to * + %gep = getelementptr i32, i32* %base, i64 %offset %data = call @llvm.aarch64.sve.ldnt1.nxv4i32( %mask, - * %base_addr) + i32* %gep) call void @llvm.aarch64.sve.stnt1.nxv4i32( %data, %mask, - * %base_addr) + i32* %gep) ret void } @@ -54,13 +51,12 @@ ; CHECK-NEXT: ldnt1w { z[[DATA:[0-9]+]].s }, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: stnt1w { z[[DATA]].s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret - %base_float = getelementptr float, float* %base, i64 %offset - %base_addr = bitcast float* %base_float to * + %gep = getelementptr float, float* %base, i64 %offset %data = call @llvm.aarch64.sve.ldnt1.nxv4f32( %mask, - * %base_addr) + float* %gep) call void @llvm.aarch64.sve.stnt1.nxv4f32( %data, %mask, - * %base_addr) + float* %gep) ret void } @@ -72,13 +68,12 @@ ; CHECK-NEXT: ldnt1h { z[[DATA:[0-9]+]].h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: stnt1h { z[[DATA]].h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret - %base_i16 = getelementptr i16, i16* %base, i64 %offset - %base_addr = bitcast i16* %base_i16 to * + %gep = getelementptr i16, i16* %base, i64 %offset %data = call @llvm.aarch64.sve.ldnt1.nxv8i16( %mask, - * %base_addr) + i16* %gep) call void @llvm.aarch64.sve.stnt1.nxv8i16( %data, %mask, - * %base_addr) + i16* %gep) ret void } @@ -87,13 +82,12 @@ ; CHECK-NEXT: ldnt1h { z[[DATA:[0-9]+]].h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: stnt1h { z[[DATA]].h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret - %base_half = getelementptr half, half* %base, i64 %offset - %base_addr = bitcast half* %base_half to * + %gep = getelementptr half, half* %base, i64 %offset %data = call @llvm.aarch64.sve.ldnt1.nxv8f16( %mask, - * %base_addr) + half* %gep) call void @llvm.aarch64.sve.stnt1.nxv8f16( %data, %mask, - * %base_addr) + half* %gep) ret void } @@ -104,42 +98,41 @@ ; CHECK-NEXT: ldnt1b { z[[DATA:[0-9]+]].b }, p0/z, [x0, x1] ; CHECK-NEXT: stnt1b { z[[DATA]].b }, p0, [x0, x1] ; CHECK-NEXT: ret - %base_i8 = getelementptr i8, i8* %base, i64 %offset - %base_addr = bitcast i8* %base_i8 to * + %gep = getelementptr i8, i8* %base, i64 %offset %data = call @llvm.aarch64.sve.ldnt1.nxv16i8( %mask, - * %base_addr) + i8* %gep) call void @llvm.aarch64.sve.stnt1.nxv16i8( %data, %mask, - * %base_addr) + i8* %gep) ret void } ; 2-element non-temporal loads. -declare @llvm.aarch64.sve.ldnt1.nxv2i64(, *) -declare @llvm.aarch64.sve.ldnt1.nxv2f64(, *) +declare @llvm.aarch64.sve.ldnt1.nxv2i64(, i64*) +declare @llvm.aarch64.sve.ldnt1.nxv2f64(, double*) ; 4-element non-temporal loads. -declare @llvm.aarch64.sve.ldnt1.nxv4i32(, *) -declare @llvm.aarch64.sve.ldnt1.nxv4f32(, *) +declare @llvm.aarch64.sve.ldnt1.nxv4i32(, i32*) +declare @llvm.aarch64.sve.ldnt1.nxv4f32(, float*) ; 8-element non-temporal loads. -declare @llvm.aarch64.sve.ldnt1.nxv8i16(, *) -declare @llvm.aarch64.sve.ldnt1.nxv8f16(, *) +declare @llvm.aarch64.sve.ldnt1.nxv8i16(, i16*) +declare @llvm.aarch64.sve.ldnt1.nxv8f16(, half*) ; 16-element non-temporal loads. -declare @llvm.aarch64.sve.ldnt1.nxv16i8(, *) +declare @llvm.aarch64.sve.ldnt1.nxv16i8(, i8*) ; 2-element non-temporal stores. -declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , *) -declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv2i64(, , i64*) +declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , double*) ; 4-element non-temporal stores. -declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , *) -declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv4i32(, , i32*) +declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , float*) ; 8-element non-temporal stores. -declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , *) -declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , i16*) +declare void @llvm.aarch64.sve.stnt1.nxv8f16(, , half*) ; 16-element non-temporal stores. -declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , *) +declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , i8*)