diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1232,6 +1232,38 @@ def int_aarch64_sve_stnt1 : AdvSIMD_1Vec_PredStore_Intrinsic; // +// Prefetches +// + +def int_aarch64_sve_prf : Intrinsic<[], [llvm_anyvector_ty, + llvm_ptr_ty, + llvm_i32_ty], [IntrArgMemOnly]>; +def int_aarch64_sve_prfb_gather : Intrinsic<[], + [llvm_anyvector_ty, + LLVMPointerType, + LLVMScalarOrSameVectorWidth<0,llvm_i64_ty>, + llvm_i32_ty], + [IntrArgMemOnly]>; +def int_aarch64_sve_prfh_gather : Intrinsic<[], + [llvm_anyvector_ty, + LLVMPointerType, + LLVMScalarOrSameVectorWidth<0,llvm_i64_ty>, + llvm_i32_ty], + [IntrArgMemOnly]>; +def int_aarch64_sve_prfw_gather : Intrinsic<[], + [llvm_anyvector_ty, + LLVMPointerType, + LLVMScalarOrSameVectorWidth<0,llvm_i64_ty>, + llvm_i32_ty], + [IntrArgMemOnly]>; +def int_aarch64_sve_prfd_gather : Intrinsic<[], + [llvm_anyvector_ty, + LLVMPointerType, + LLVMScalarOrSameVectorWidth<0,llvm_i64_ty>, + llvm_i32_ty], + [IntrArgMemOnly]>; + +// // Scalar to vector operations // diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -284,7 +284,17 @@ LDP, STP, - STNP + STNP, + + // SVE prefetch + GPRF_S_IMM, + GPRF_D_IMM, + GPRF_D_SCALED, + GPRF_S_SXTW_SCALED, + GPRF_S_UXTW_SCALED, + GPRF_D_SXTW_SCALED, + GPRF_D_UXTW_SCALED, + }; } // end namespace AArch64ISD diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1426,6 +1426,13 @@ case AArch64ISD::STP: return "AArch64ISD::STP"; case AArch64ISD::STNP: return "AArch64ISD::STNP"; case AArch64ISD::DUP_PRED: return "AArch64ISD::DUP_PRED"; + case AArch64ISD::GPRF_S_IMM: return "AArch64ISD::GPRF_S_IMM"; + case AArch64ISD::GPRF_D_IMM: return "AArch64ISD::GPRF_D_IMM"; + case AArch64ISD::GPRF_D_SCALED: return "AArch64ISD::GPRF_D_SCALED"; + case AArch64ISD::GPRF_S_SXTW_SCALED: return "AArch64ISD::GPRF_S_SXTW_SCALED"; + case AArch64ISD::GPRF_S_UXTW_SCALED: return "AArch64ISD::GPRF_S_UXTW_SCALED"; + case AArch64ISD::GPRF_D_SXTW_SCALED: return "AArch64ISD::GPRF_D_SXTW_SCALED"; + case AArch64ISD::GPRF_D_UXTW_SCALED: return "AArch64ISD::GPRF_D_UXTW_SCALED"; } return nullptr; } diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -10,6 +10,19 @@ // //===----------------------------------------------------------------------===// +def SDT_AArch64_GPRF : SDTypeProfile< 0, 5, [ + SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisInt<3>, + SDTCVecEltisVT<0,i1>, SDTCisSameNumEltsAs<0, 2> +]>; + +def AArch64prf_gather_s_imm : SDNode<"AArch64ISD::GPRF_S_IMM", SDT_AArch64_GPRF, [SDNPHasChain, SDNPMayLoad]>; +def AArch64prf_gather_d_imm : SDNode<"AArch64ISD::GPRF_D_IMM", SDT_AArch64_GPRF, [SDNPHasChain, SDNPMayLoad]>; +def AArch64prf_gather_d_scaled : SDNode<"AArch64ISD::GPRF_D_SCALED", SDT_AArch64_GPRF, [SDNPHasChain, SDNPMayLoad]>; +def AArch64prf_gather_s_sxtw_scaled : SDNode<"AArch64ISD::GPRF_S_SXTW_SCALED", SDT_AArch64_GPRF, [SDNPHasChain, SDNPMayLoad]>; +def AArch64prf_gather_s_uxtw_scaled : SDNode<"AArch64ISD::GPRF_S_UXTW_SCALED", SDT_AArch64_GPRF, [SDNPHasChain, SDNPMayLoad]>; +def AArch64prf_gather_d_sxtw_scaled : SDNode<"AArch64ISD::GPRF_D_SXTW_SCALED", SDT_AArch64_GPRF, [SDNPHasChain, SDNPMayLoad]>; +def AArch64prf_gather_d_uxtw_scaled : SDNode<"AArch64ISD::GPRF_D_UXTW_SCALED", SDT_AArch64_GPRF, [SDNPHasChain, SDNPMayLoad]>; + def SDT_AArch64_LDNF1 : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> @@ -757,37 +770,37 @@ // Gather prefetch using scaled 32-bit offsets, e.g. // prfh pldl1keep, p0, [x0, z0.s, uxtw #1] - defm PRFB_S : sve_mem_32b_prfm_sv_scaled<0b00, "prfb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>; - defm PRFH_S : sve_mem_32b_prfm_sv_scaled<0b01, "prfh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>; - defm PRFW_S : sve_mem_32b_prfm_sv_scaled<0b10, "prfw", ZPR32ExtSXTW32, ZPR32ExtUXTW32>; - defm PRFD_S : sve_mem_32b_prfm_sv_scaled<0b11, "prfd", ZPR32ExtSXTW64, ZPR32ExtUXTW64>; + defm PRFB_S : sve_mem_32b_prfm_sv_scaled<0b00, "prfb", AArch64prf_gather_s_sxtw_scaled, AArch64prf_gather_s_uxtw_scaled, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, i8>; + defm PRFH_S : sve_mem_32b_prfm_sv_scaled<0b01, "prfh", AArch64prf_gather_s_sxtw_scaled, AArch64prf_gather_s_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, i16>; + defm PRFW_S : sve_mem_32b_prfm_sv_scaled<0b10, "prfw", AArch64prf_gather_s_sxtw_scaled, AArch64prf_gather_s_uxtw_scaled, ZPR32ExtSXTW32, ZPR32ExtUXTW32, i32>; + defm PRFD_S : sve_mem_32b_prfm_sv_scaled<0b11, "prfd", AArch64prf_gather_s_sxtw_scaled, AArch64prf_gather_s_uxtw_scaled, ZPR32ExtSXTW64, ZPR32ExtUXTW64, i64>; // Gather prefetch using unpacked, scaled 32-bit offsets, e.g. // prfh pldl1keep, p0, [x0, z0.d, uxtw #1] - defm PRFB_D : sve_mem_64b_prfm_sv_ext_scaled<0b00, "prfb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>; - defm PRFH_D : sve_mem_64b_prfm_sv_ext_scaled<0b01, "prfh", ZPR64ExtSXTW16, ZPR64ExtUXTW16>; - defm PRFW_D : sve_mem_64b_prfm_sv_ext_scaled<0b10, "prfw", ZPR64ExtSXTW32, ZPR64ExtUXTW32>; - defm PRFD_D : sve_mem_64b_prfm_sv_ext_scaled<0b11, "prfd", ZPR64ExtSXTW64, ZPR64ExtUXTW64>; + defm PRFB_D : sve_mem_64b_prfm_sv_ext_scaled<0b00, "prfb", AArch64prf_gather_d_sxtw_scaled, AArch64prf_gather_d_uxtw_scaled, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, i8>; + defm PRFH_D : sve_mem_64b_prfm_sv_ext_scaled<0b01, "prfh", AArch64prf_gather_d_sxtw_scaled, AArch64prf_gather_d_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, i16>; + defm PRFW_D : sve_mem_64b_prfm_sv_ext_scaled<0b10, "prfw", AArch64prf_gather_d_sxtw_scaled, AArch64prf_gather_d_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, i32>; + defm PRFD_D : sve_mem_64b_prfm_sv_ext_scaled<0b11, "prfd", AArch64prf_gather_d_sxtw_scaled, AArch64prf_gather_d_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, i64>; // Gather prefetch using scaled 64-bit offsets, e.g. // prfh pldl1keep, p0, [x0, z0.d, lsl #1] - defm PRFB_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b00, "prfb", ZPR64ExtLSL8>; - defm PRFH_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b01, "prfh", ZPR64ExtLSL16>; - defm PRFW_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b10, "prfw", ZPR64ExtLSL32>; - defm PRFD_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b11, "prfd", ZPR64ExtLSL64>; + defm PRFB_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b00, "prfb", AArch64prf_gather_d_scaled, ZPR64ExtLSL8, i8>; + defm PRFH_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b01, "prfh", AArch64prf_gather_d_scaled, ZPR64ExtLSL16, i16>; + defm PRFW_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b10, "prfw", AArch64prf_gather_d_scaled, ZPR64ExtLSL32, i32>; + defm PRFD_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b11, "prfd", AArch64prf_gather_d_scaled, ZPR64ExtLSL64, i64>; // Gather prefetch using 32/64-bit pointers with offset, e.g. // prfh pldl1keep, p0, [z0.s, #16] // prfh pldl1keep, p0, [z0.d, #16] - defm PRFB_S_PZI : sve_mem_32b_prfm_vi<0b00, "prfb", imm0_31>; - defm PRFH_S_PZI : sve_mem_32b_prfm_vi<0b01, "prfh", uimm5s2>; - defm PRFW_S_PZI : sve_mem_32b_prfm_vi<0b10, "prfw", uimm5s4>; - defm PRFD_S_PZI : sve_mem_32b_prfm_vi<0b11, "prfd", uimm5s8>; - - defm PRFB_D_PZI : sve_mem_64b_prfm_vi<0b00, "prfb", imm0_31>; - defm PRFH_D_PZI : sve_mem_64b_prfm_vi<0b01, "prfh", uimm5s2>; - defm PRFW_D_PZI : sve_mem_64b_prfm_vi<0b10, "prfw", uimm5s4>; - defm PRFD_D_PZI : sve_mem_64b_prfm_vi<0b11, "prfd", uimm5s8>; + defm PRFB_S_PZI : sve_mem_32b_prfm_vi<0b00, "prfb", imm0_31, AArch64prf_gather_s_imm, i8 >; + defm PRFH_S_PZI : sve_mem_32b_prfm_vi<0b01, "prfh", uimm5s2, AArch64prf_gather_s_imm, i16>; + defm PRFW_S_PZI : sve_mem_32b_prfm_vi<0b10, "prfw", uimm5s4, AArch64prf_gather_s_imm, i32>; + defm PRFD_S_PZI : sve_mem_32b_prfm_vi<0b11, "prfd", uimm5s8, AArch64prf_gather_s_imm, i64>; + + defm PRFB_D_PZI : sve_mem_64b_prfm_vi<0b00, "prfb", imm0_31, AArch64prf_gather_d_imm, i8 >; + defm PRFH_D_PZI : sve_mem_64b_prfm_vi<0b01, "prfh", uimm5s2, AArch64prf_gather_d_imm, i16>; + defm PRFW_D_PZI : sve_mem_64b_prfm_vi<0b10, "prfw", uimm5s4, AArch64prf_gather_d_imm, i32>; + defm PRFD_D_PZI : sve_mem_64b_prfm_vi<0b11, "prfd", uimm5s8, AArch64prf_gather_d_imm, i64>; defm ADR_SXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_sxtw<0b00, "adr">; defm ADR_UXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_uxtw<0b01, "adr">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -6201,10 +6201,18 @@ } multiclass sve_mem_32b_prfm_sv_scaled msz, string asm, + SDPatternOperator sxtw_op, + SDPatternOperator uxtw_op, RegisterOperand sxtw_opnd, - RegisterOperand uxtw_opnd> { + RegisterOperand uxtw_opnd, + ValueType vt> { def _UXTW_SCALED : sve_mem_32b_prfm_sv; def _SXTW_SCALED : sve_mem_32b_prfm_sv; + + def : Pat<(uxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$indices), (i32 sve_prfop:$prfop), vt), + (!cast(NAME # _UXTW_SCALED) sve_prfop:$prfop, PPR:$gp, GPR64sp:$base, ZPR:$indices)>; + def : Pat<(sxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$indices), (i32 sve_prfop:$prfop), vt), + (!cast(NAME # _SXTW_SCALED) sve_prfop:$prfop, PPR:$gp, GPR64sp:$base, ZPR:$indices)>; } class sve_mem_32b_prfm_vi msz, string asm, Operand imm_ty> @@ -6227,9 +6235,13 @@ let Inst{3-0} = prfop; } -multiclass sve_mem_32b_prfm_vi msz, string asm, Operand imm_ty> { +multiclass sve_mem_32b_prfm_vi msz, string asm, Operand imm_ty, + SDPatternOperator prefetch, ValueType vt> { def NAME : sve_mem_32b_prfm_vi; + def : Pat<(prefetch (nxv4i1 PPR:$gp), (i64 imm_ty:$imm5), (nxv4i32 ZPR:$indices), (i32 sve_prfop:$prfop), vt), + (!cast(NAME) sve_prfop:$prfop, PPR:$gp, ZPR:$indices, imm_ty:$imm5)>; + def : InstAlias(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>; } @@ -6484,15 +6496,29 @@ } multiclass sve_mem_64b_prfm_sv_ext_scaled msz, string asm, + SDPatternOperator sxtw_op, + SDPatternOperator uxtw_op, RegisterOperand sxtw_opnd, - RegisterOperand uxtw_opnd> { + RegisterOperand uxtw_opnd, + ValueType vt> { def _UXTW_SCALED : sve_mem_64b_prfm_sv; def _SXTW_SCALED : sve_mem_64b_prfm_sv; + + def : Pat<(uxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), (i32 sve_prfop:$prfop), vt), + (!cast(NAME # _UXTW_SCALED) sve_prfop:$prfop, PPR:$gp, GPR64sp:$base, ZPR:$indices)>; + def : Pat<(sxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), (i32 sve_prfop:$prfop), vt), + (!cast(NAME # _SXTW_SCALED) sve_prfop:$prfop, PPR:$gp, GPR64sp:$base, ZPR:$indices)>; } multiclass sve_mem_64b_prfm_sv_lsl_scaled msz, string asm, - RegisterOperand zprext> { + SDPatternOperator op, + RegisterOperand zprext, ValueType vt> { def NAME : sve_mem_64b_prfm_sv; + + def : Pat<(op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), + (i32 sve_prfop:$prfop), vt), + (!cast(NAME) sve_prfop:$prfop, PPR:$gp, GPR64sp:$base, + ZPR:$indices)>; } @@ -6518,14 +6544,17 @@ let hasSideEffects = 1; } -multiclass sve_mem_64b_prfm_vi msz, string asm, Operand imm_ty> { +multiclass sve_mem_64b_prfm_vi msz, string asm, Operand imm_ty, + SDPatternOperator prefetch, ValueType vt> { def NAME : sve_mem_64b_prfm_vi; + def : Pat<(prefetch (nxv2i1 PPR:$gp), (i64 imm_ty:$imm5), (nxv2i64 ZPR:$indices), (i32 sve_prfop:$prfop), vt), + (!cast(NAME) sve_prfop:$prfop, PPR:$gp, ZPR:$indices, imm_ty:$imm5)>; + def : InstAlias(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>; } - //===----------------------------------------------------------------------===// // SVE Compute Vector Address Group //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-prefetches.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-prefetches.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-prefetches.ll @@ -0,0 +1,823 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; Testing prfop encodings +; +define void @test_svprf_pldl1strm( %pg, i8* %base) { +; CHECK-LABEL: test_svprf_pldl1strm +; CHECK: prfb pldl1strm, p0, [x0] +entry: + tail call void @llvm.aarch64.sve.prf.nxv16i1( %pg, i8* %base, i32 1) + ret void +} + +define void @test_svprf_pldl2keep( %pg, i8* %base) { +; CHECK-LABEL: test_svprf_pldl2keep +; CHECK: prfb pldl2keep, p0, [x0] +entry: + tail call void @llvm.aarch64.sve.prf.nxv16i1( %pg, i8* %base, i32 2) + ret void +} + +define void @test_svprf_pldl2strm( %pg, i8* %base) { +; CHECK-LABEL: test_svprf_pldl2strm +; CHECK: prfb pldl2strm, p0, [x0] +entry: + tail call void @llvm.aarch64.sve.prf.nxv16i1( %pg, i8* %base, i32 3) + ret void +} + +define void @test_svprf_pldl3keep( %pg, i8* %base) { +; CHECK-LABEL: test_svprf_pldl3keep +; CHECK: prfb pldl3keep, p0, [x0] +entry: + tail call void @llvm.aarch64.sve.prf.nxv16i1( %pg, i8* %base, i32 4) + ret void +} + +define void @test_svprf_pldl3strm( %pg, i8* %base) { +; CHECK-LABEL: test_svprf_pldl3strm +; CHECK: prfb pldl3strm, p0, [x0] +entry: + tail call void @llvm.aarch64.sve.prf.nxv16i1( %pg, i8* %base, i32 5) + ret void +} + +define void @test_svprf_pstl1keep( %pg, i8* %base) { +; CHECK-LABEL: test_svprf_pstl1keep +; CHECK: prfb pstl1keep, p0, [x0] +entry: + tail call void @llvm.aarch64.sve.prf.nxv16i1( %pg, i8* %base, i32 8) + ret void +} + +define void @test_svprf_pstl1strm( %pg, i8* %base) { +; CHECK-LABEL: test_svprf_pstl1strm +; CHECK: prfb pstl1strm, p0, [x0] +entry: + tail call void @llvm.aarch64.sve.prf.nxv16i1( %pg, i8* %base, i32 9) + ret void +} + +define void @test_svprf_pstl2keep( %pg, i8* %base) { +; CHECK-LABEL: test_svprf_pstl2keep +; CHECK: prfb pstl2keep, p0, [x0] +entry: + tail call void @llvm.aarch64.sve.prf.nxv16i1( %pg, i8* %base, i32 10) + ret void +} + +define void @test_svprf_pstl2strm( %pg, i8* %base) { +; CHECK-LABEL: test_svprf_pstl2strm +; CHECK: prfb pstl2strm, p0, [x0] +entry: + tail call void @llvm.aarch64.sve.prf.nxv16i1( %pg, i8* %base, i32 11) + ret void +} + +define void @test_svprf_pstl3keep( %pg, i8* %base) { +; CHECK-LABEL: test_svprf_pstl3keep +; CHECK: prfb pstl3keep, p0, [x0] +entry: + tail call void @llvm.aarch64.sve.prf.nxv16i1( %pg, i8* %base, i32 12) + ret void +} + +define void @test_svprf_pstl3strm( %pg, i8* %base) { +; CHECK-LABEL: test_svprf_pstl3strm +; CHECK: prfb pstl3strm, p0, [x0] +entry: + tail call void @llvm.aarch64.sve.prf.nxv16i1( %pg, i8* %base, i32 13) + ret void +} + +; +; Testing imm limits of SI form +; + +define void @test_svprf_vnum_under( %pg, * %base) { +; CHECK-LABEL: test_svprf_vnum_under +; CHECK-NOT: prfb pstl3strm, p0, [x0, #-33, mul vl] +entry: + %gep = getelementptr inbounds , * %base, i64 -33, i64 0 + tail call void @llvm.aarch64.sve.prf.nxv16i1( %pg, i8* %gep, i32 13) + ret void +} + +define void @test_svprf_vnum_min( %pg, * %base) { +; CHECK-LABEL: test_svprf_vnum_min +; CHECK: prfb pstl3strm, p0, [x0, #-32, mul vl] +entry: + %gep = getelementptr inbounds , * %base, i64 -32, i64 0 + tail call void @llvm.aarch64.sve.prf.nxv16i1( %pg, i8* %gep, i32 13) + ret void +} + +define void @test_svprf_vnum_over( %pg, * %base) { +; CHECK-LABEL: test_svprf_vnum_over +; CHECK-NOT: prfb pstl3strm, p0, [x0, #32, mul vl] +entry: + %gep = getelementptr inbounds , * %base, i64 32, i64 0 + tail call void @llvm.aarch64.sve.prf.nxv16i1( %pg, i8* %gep, i32 13) + ret void +} + +define void @test_svprf_vnum_max( %pg, * %base) { +; CHECK-LABEL: test_svprf_vnum_max +; CHECK: prfb pstl3strm, p0, [x0, #31, mul vl] +entry: + %gep = getelementptr inbounds , * %base, i64 31, i64 0 + tail call void @llvm.aarch64.sve.prf.nxv16i1( %pg, i8* %gep, i32 13) + ret void +} + +; +; scalar contiguous +; + +define void @test_svprfb( %pg, i8* %base) { +; CHECK-LABEL: test_svprfb +; CHECK: prfb pldl1keep, p0, [x0] +entry: + tail call void @llvm.aarch64.sve.prf.nxv16i1( %pg, i8* %base, i32 0) + ret void +} + +define void @test_svprfh( %pg, i8* %base) { +; CHECK-LABEL: test_svprfh +; CHECK: prfh pldl1keep, p0, [x0] +entry: + tail call void @llvm.aarch64.sve.prf.nxv8i1( %pg, i8* %base, i32 0) + ret void +} + +define void @test_svprfw( %pg, i8* %base) { +; CHECK-LABEL: test_svprfw +; CHECK: prfw pldl1keep, p0, [x0] +entry: + tail call void @llvm.aarch64.sve.prf.nxv4i1( %pg, i8* %base, i32 0) + ret void +} + +define void @test_svprfd( %pg, i8* %base) { +; CHECK-LABEL: test_svprfd +; CHECK: prfd pldl1keep, p0, [x0] +entry: + tail call void @llvm.aarch64.sve.prf.nxv2i1( %pg, i8* %base, i32 0) + ret void +} + +; +; scalar + imm contiguous +; +; imm form of prfb is tested above + +define void @test_svprfh_vnum( %pg, * %base) { +; CHECK-LABEL: test_svprfh_vnum +; CHECK: prfh pstl3strm, p0, [x0, #31, mul vl] +entry: + %gep = getelementptr , * %base, i64 31 + %addr = bitcast * %gep to i8* + tail call void @llvm.aarch64.sve.prf.nxv8i1( %pg, i8* %addr, i32 13) + ret void +} + +define void @test_svprfw_vnum( %pg, * %base) { +; CHECK-LABEL: test_svprfw_vnum +; CHECK: prfw pstl3strm, p0, [x0, #31, mul vl] +entry: + %gep = getelementptr , * %base, i64 31 + %addr = bitcast * %gep to i8* + tail call void @llvm.aarch64.sve.prf.nxv4i1( %pg, i8* %addr, i32 13) + ret void +} + +define void @test_svprfd_vnum( %pg, * %base) { +; CHECK-LABEL: test_svprfd_vnum +; CHECK: prfd pstl3strm, p0, [x0, #31, mul vl] +entry: + %gep = getelementptr , * %base, i64 31 + %addr = bitcast * %gep to i8* + tail call void @llvm.aarch64.sve.prf.nxv2i1( %pg, i8* %addr, i32 13) + ret void +} + +; +; scalar + scaled scalar contiguous +; + +define void @test_svprfb_ss( %pg, i8* %base, i64 %offset) { +; CHECK-LABEL: test_svprfb_ss +; CHECK: prfb pstl3strm, p0, [x0, x1] +entry: + %addr = getelementptr i8, i8* %base, i64 %offset + tail call void @llvm.aarch64.sve.prf.nxv16i1( %pg, i8* %addr, i32 13) + ret void +} + +define void @test_svprfh_ss( %pg, i16* %base, i64 %offset) { +; CHECK-LABEL: test_svprfh_ss +; CHECK: prfh pstl3strm, p0, [x0, x1, lsl #1] +entry: + %gep = getelementptr i16, i16* %base, i64 %offset + %addr = bitcast i16* %gep to i8* + tail call void @llvm.aarch64.sve.prf.nxv8i1( %pg, i8* %addr, i32 13) + ret void +} + +define void @test_svprfw_ss( %pg, i32* %base, i64 %offset) { +; CHECK-LABEL: test_svprfw_ss +; CHECK: prfw pstl3strm, p0, [x0, x1, lsl #2] +entry: + %gep = getelementptr i32, i32* %base, i64 %offset + %addr = bitcast i32* %gep to i8* + tail call void @llvm.aarch64.sve.prf.nxv4i1( %pg, i8* %addr, i32 13) + ret void +} + +define void @test_svprfd_ss( %pg, i64* %base, i64 %offset) { +; CHECK-LABEL: test_svprfd_ss +; CHECK: prfd pstl3strm, p0, [x0, x1, lsl #3] +entry: + %gep = getelementptr i64, i64* %base, i64 %offset + %addr = bitcast i64* %gep to i8* + tail call void @llvm.aarch64.sve.prf.nxv2i1( %pg, i8* %addr, i32 13) + ret void +} + + +; +; scalar + vector gather - 32-bit scaled offset +; +define void @test_svprfb_u32offset( %pg, i8* %base, %offsets) { +; CHECK-LABEL: test_svprfb_u32offset +; CHECK: prfb pstl3strm, p0, [x0, z0.s, uxtw] +entry: + %i64_offsets = zext %offsets to + tail call void @llvm.aarch64.sve.prfb.gather.nxv4i1( %pg, i8* %base, %i64_offsets, i32 13) + ret void +} + +define void @test_svprfh_u32index( %pg, i8* %base, %indices) { +; CHECK-LABEL: test_svprfh_u32index +; CHECK: prfh pstl3strm, p0, [x0, z0.s, uxtw #1] +entry: + %i64_indices = zext %indices to + %offsets = shl %i64_indices, shufflevector ( insertelement ( undef, i64 1, i32 0), undef, zeroinitializer) + tail call void @llvm.aarch64.sve.prfh.gather.nxv4i1( %pg, i8* %base, %offsets, i32 13) + ret void +} + +define void @test_svprfh_u32index_mul( %pg, i8* %base, %indices) { +; CHECK-LABEL: test_svprfh_u32index_mul +; CHECK: prfh pstl3strm, p0, [x0, z0.s, uxtw #1] +entry: + %i64_indices = zext %indices to + %offsets = mul %i64_indices, shufflevector ( insertelement ( undef, i64 2, i32 0), undef, zeroinitializer) + tail call void @llvm.aarch64.sve.prfh.gather.nxv4i1( %pg, i8* %base, %offsets, i32 13) + ret void +} + +define void @test_svprfw_u32index( %pg, i8* %base, %indices) { +; CHECK-LABEL: test_svprfw_u32index +; CHECK: prfw pstl3strm, p0, [x0, z0.s, uxtw #2] +entry: + %i64_indices = zext %indices to + %offsets = shl %i64_indices, shufflevector ( insertelement ( undef, i64 2, i32 0), undef, zeroinitializer) + tail call void @llvm.aarch64.sve.prfw.gather.nxv4i1( %pg, i8* %base, %offsets, i32 13) + ret void +} + +define void @test_svprfw_u32index_mul( %pg, i8* %base, %indices) { +; CHECK-LABEL: test_svprfw_u32index_mul +; CHECK: prfw pstl3strm, p0, [x0, z0.s, uxtw #2] +entry: + %i64_indices = zext %indices to + %offsets = mul %i64_indices, shufflevector ( insertelement ( undef, i64 4, i32 0), undef, zeroinitializer) + tail call void @llvm.aarch64.sve.prfw.gather.nxv4i1( %pg, i8* %base, %offsets, i32 13) + ret void +} + +define void @test_svprfd_u32index( %pg, i8* %base, %indices) { +; CHECK-LABEL: test_svprfd_u32index +; CHECK: prfd pstl3strm, p0, [x0, z0.s, uxtw #3] +entry: + %i64_indices = zext %indices to + %offsets = shl %i64_indices, shufflevector ( insertelement ( undef, i64 3, i32 0), undef, zeroinitializer) + tail call void @llvm.aarch64.sve.prfd.gather.nxv4i1( %pg, i8* %base, %offsets, i32 13) + ret void +} + +define void @test_svprfd_u32index_mul( %pg, i8* %base, %indices) { +; CHECK-LABEL: test_svprfd_u32index_mul +; CHECK: prfd pstl3strm, p0, [x0, z0.s, uxtw #3] +entry: + %i64_indices = zext %indices to + %offsets = mul %i64_indices, shufflevector ( insertelement ( undef, i64 8, i32 0), undef, zeroinitializer) + tail call void @llvm.aarch64.sve.prfd.gather.nxv4i1( %pg, i8* %base, %offsets, i32 13) + ret void +} + +; +; scalar + vector gather - 32-bit unpacked scaled offset +; +define void @test_svprfb_u32offset_unpacked( %pg, i8* %base, %offsets) { +; CHECK-LABEL: test_svprfb_u32offset_unpacked +; CHECK: prfb pstl3strm, p0, [x0, z0.d, uxtw] +entry: + %i64_offsets = zext %offsets to + tail call void @llvm.aarch64.sve.prfb.gather.nxv2i1( %pg, i8* %base, %i64_offsets, i32 13) + ret void +} + +define void @test_svprfh_u32index_unpacked( %pg, i8* %base, %indices) { +; CHECK-LABEL: test_svprfh_u32index_unpacked +; CHECK: prfh pstl3strm, p0, [x0, z0.d, uxtw #1] +entry: + %i64_indices = zext %indices to + %offsets = shl %i64_indices, shufflevector ( insertelement ( undef, i64 1, i32 0), undef, zeroinitializer) + tail call void @llvm.aarch64.sve.prfh.gather.nxv2i1( %pg, i8* %base, %offsets, i32 13) + ret void +} + +define void @test_svprfh_u32index_unpacked_mul( %pg, i8* %base, %indices) { +; CHECK-LABEL: test_svprfh_u32index_unpacked_mul +; CHECK: prfh pstl3strm, p0, [x0, z0.d, uxtw #1] +entry: + %i64_indices = zext %indices to + %offsets = mul %i64_indices, shufflevector ( insertelement ( undef, i64 2, i32 0), undef, zeroinitializer) + tail call void @llvm.aarch64.sve.prfh.gather.nxv2i1( %pg, i8* %base, %offsets, i32 13) + ret void +} + +define void @test_svprfw_u32index_unpacked( %pg, i8* %base, %indices) { +; CHECK-LABEL: test_svprfw_u32index_unpacked +; CHECK: prfw pstl3strm, p0, [x0, z0.d, uxtw #2] +entry: + %i64_indices = zext %indices to + %offsets = shl %i64_indices, shufflevector ( insertelement ( undef, i64 2, i32 0), undef, zeroinitializer) + tail call void @llvm.aarch64.sve.prfw.gather.nxv2i1( %pg, i8* %base, %offsets, i32 13) + ret void +} + +define void @test_svprfw_u32index_unpacked_mul( %pg, i8* %base, %indices) { +; CHECK-LABEL: test_svprfw_u32index_unpacked_mul +; CHECK: prfw pstl3strm, p0, [x0, z0.d, uxtw #2] +entry: + %i64_indices = zext %indices to + %offsets = mul %i64_indices, shufflevector ( insertelement ( undef, i64 4, i32 0), undef, zeroinitializer) + tail call void @llvm.aarch64.sve.prfw.gather.nxv2i1( %pg, i8* %base, %offsets, i32 13) + ret void +} + +define void @test_svprfd_u32index_unpacked( %pg, i8* %base, %indices) { +; CHECK-LABEL: test_svprfd_u32index_unpacked +; CHECK: prfd pstl3strm, p0, [x0, z0.d, uxtw #3] +entry: + %i64_indices = zext %indices to + %offsets = shl %i64_indices, shufflevector ( insertelement ( undef, i64 3, i32 0), undef, zeroinitializer) + tail call void @llvm.aarch64.sve.prfd.gather.nxv2i1( %pg, i8* %base, %offsets, i32 13) + ret void +} + +define void @test_svprfd_u32index_unpacked_mul( %pg, i8* %base, %indices) { +; CHECK-LABEL: test_svprfd_u32index_unpacked_mul +; CHECK: prfd pstl3strm, p0, [x0, z0.d, uxtw #3] +entry: + %i64_indices = zext %indices to + %offsets = mul %i64_indices, shufflevector ( insertelement ( undef, i64 8, i32 0), undef, zeroinitializer) + tail call void @llvm.aarch64.sve.prfd.gather.nxv2i1( %pg, i8* %base, %offsets, i32 13) + ret void +} + +; +; scalar + vector gather - 64-bit scaled offset +; +define void @test_svprfb_u64offset( %pg, i8* %base, %offsets) { +; CHECK-LABEL: test_svprfb_u64offset +; CHECK: prfb pstl3strm, p0, [x0, z0.d] +entry: + tail call void @llvm.aarch64.sve.prfb.gather.nxv2i1( %pg, i8* %base, %offsets, i32 13) + ret void +} + +define void @test_svprfh_u64offset( %pg, i8* %base, %indices) { +; CHECK-LABEL: test_svprfh_u64offset +; CHECK: prfh pstl3strm, p0, [x0, z0.d, lsl #1] +entry: + %offsets = shl %indices, shufflevector ( insertelement ( undef, i64 1, i32 0), undef, zeroinitializer) + tail call void @llvm.aarch64.sve.prfh.gather.nxv2i1( %pg, i8* %base, %offsets, i32 13) + ret void +} + +define void @test_svprfh_u64offset_mul( %pg, i8* %base, %indices) { +; CHECK-LABEL: test_svprfh_u64offset_mul +; CHECK: prfh pstl3strm, p0, [x0, z0.d, lsl #1] +entry: + %offsets = mul %indices, shufflevector ( insertelement ( undef, i64 2, i32 0), undef, zeroinitializer) + tail call void @llvm.aarch64.sve.prfh.gather.nxv2i1( %pg, i8* %base, %offsets, i32 13) + ret void +} + +define void @test_svprfw_u64offset( %pg, i8* %base, %indices) { +; CHECK-LABEL: test_svprfw_u64offset +; CHECK: prfw pstl3strm, p0, [x0, z0.d, lsl #2] +entry: + %offsets = shl %indices, shufflevector ( insertelement ( undef, i64 2, i32 0), undef, zeroinitializer) + tail call void @llvm.aarch64.sve.prfw.gather.nxv2i1( %pg, i8* %base, %offsets, i32 13) + ret void +} + +define void @test_svprfw_u64offset_mul( %pg, i8* %base, %indices) { +; CHECK-LABEL: test_svprfw_u64offset_mul +; CHECK: prfw pstl3strm, p0, [x0, z0.d, lsl #2] +entry: + %offsets = mul %indices, shufflevector ( insertelement ( undef, i64 4, i32 0), undef, zeroinitializer) + tail call void @llvm.aarch64.sve.prfw.gather.nxv2i1( %pg, i8* %base, %offsets, i32 13) + ret void +} + +define void @test_svprfd_u64offset( %pg, i8* %base, %indices) { +; CHECK-LABEL: test_svprfd_u64offset +; CHECK: prfd pstl3strm, p0, [x0, z0.d, lsl #3] +entry: + %offsets = shl %indices, shufflevector ( insertelement ( undef, i64 3, i32 0), undef, zeroinitializer) + tail call void @llvm.aarch64.sve.prfd.gather.nxv2i1( %pg, i8* %base, %offsets, i32 13) + ret void +} + +define void @test_svprfd_u64offset_mul( %pg, i8* %base, %indices) { +; CHECK-LABEL: test_svprfd_u64offset_mul +; CHECK: prfd pstl3strm, p0, [x0, z0.d, lsl #3] +entry: + %offsets = mul %indices, shufflevector ( insertelement ( undef, i64 8, i32 0), undef, zeroinitializer) + tail call void @llvm.aarch64.sve.prfd.gather.nxv2i1( %pg, i8* %base, %offsets, i32 13) + ret void +} + +; +; vector plus imm gather - 32-bit element +; +define void @test_svprfb_u32base_vnum_under( %pg, %bases) +{ +; CHECK-LABEL: test_svprfb_u32base_vnum_under +; CHECK: mov x[[BASE:[0-9]+]], #-1 +; CHECK: prfb pstl3strm, p0, [x[[BASE]], z0.s, uxtw] +entry: + %i64_bases = zext %bases to + %offset = inttoptr i64 -1 to i8* + tail call void @llvm.aarch64.sve.prfb.gather.nxv4i1( %pg, i8* %offset, %i64_bases, i32 13) + ret void +} + +define void @test_svprfb_u32base_vnum_min( %pg, %bases) +{ +; CHECK-LABEL: test_svprfb_u32base_vnum_min +; CHECK: prfb pstl3strm, p0, [z0.s] +entry: + %i64_bases = zext %bases to + tail call void @llvm.aarch64.sve.prfb.gather.nxv4i1( %pg, i8* null, %i64_bases, i32 13) + ret void +} + +define void @test_svprfb_u32base_vnum_max( %pg, %bases) +{ +; CHECK-LABEL: test_svprfb_u32base_vnum_max +; CHECK: prfb pstl3strm, p0, [z0.s, #31] +entry: + %i64_bases = zext %bases to + %offset = inttoptr i64 31 to i8* + tail call void @llvm.aarch64.sve.prfb.gather.nxv4i1( %pg, i8* %offset, %i64_bases, i32 13) + ret void +} + +define void @test_svprfb_u32base_vnum_over( %pg, %bases) +{ +; CHECK-LABEL: test_svprfb_u32base_vnum_over +; CHECK: mov w[[BASE:[0-9]+]], #32 +; CHECK: prfb pstl3strm, p0, [x[[BASE]], z0.s, uxtw] +entry: + %i64_bases = zext %bases to + %offset = inttoptr i64 32 to i8* + tail call void @llvm.aarch64.sve.prfb.gather.nxv4i1( %pg, i8* %offset, %i64_bases, i32 13) + ret void +} + +define void @test_svprfh_u32base_vnum_under( %pg, %bases) +{ +; CHECK-LABEL: test_svprfh_u32base_vnum_under +; CHECK: mov x[[BASE:[0-9]+]], #-1 +; CHECK: prfb pstl3strm, p0, [x[[BASE]], z0.s, uxtw] +entry: + %i64_bases = zext %bases to + %offset = inttoptr i64 -1 to i8* + tail call void @llvm.aarch64.sve.prfh.gather.nxv4i1( %pg, i8* %offset, %i64_bases, i32 13) + ret void +} + +define void @test_svprfh_u32base_vnum_min( %pg, %bases) +{ +; CHECK-LABEL: test_svprfh_u32base_vnum_min +; CHECK: prfh pstl3strm, p0, [z0.s] +entry: + %i64_bases = zext %bases to + tail call void @llvm.aarch64.sve.prfh.gather.nxv4i1( %pg, i8* null, %i64_bases, i32 13) + ret void +} + +define void @test_svprfh_u32base_vnum_max( %pg, %bases) +{ +; CHECK-LABEL: test_svprfh_u32base_vnum_max +; CHECK: prfh pstl3strm, p0, [z0.s, #62] +entry: + %i64_bases = zext %bases to + %offset = inttoptr i64 62 to i8* + tail call void @llvm.aarch64.sve.prfh.gather.nxv4i1( %pg, i8* %offset, %i64_bases, i32 13) + ret void +} + +define void @test_svprfh_u32base_vnum_over( %pg, %bases) +{ +; CHECK-LABEL: test_svprfh_u32base_vnum_over +; CHECK: mov w[[BASE:[0-9]+]], #64 +; CHECK: prfb pstl3strm, p0, [x[[BASE]], z0.s, uxtw] +entry: + %i64_bases = zext %bases to + %offset = inttoptr i64 64 to i8* + tail call void @llvm.aarch64.sve.prfh.gather.nxv4i1( %pg, i8* %offset, %i64_bases, i32 13) + ret void +} + +define void @test_svprfw_u32base_vnum_under( %pg, %bases) +{ +; CHECK-LABEL: test_svprfw_u32base_vnum_under +; CHECK: mov x[[BASE:[0-9]+]], #-1 +; CHECK: prfb pstl3strm, p0, [x[[BASE]], z0.s, uxtw] +entry: + %i64_bases = zext %bases to + %offset = inttoptr i64 -1 to i8* + tail call void @llvm.aarch64.sve.prfw.gather.nxv4i1( %pg, i8* %offset, %i64_bases, i32 13) + ret void +} + +define void @test_svprfw_u32base_vnum_min( %pg, %bases) +{ +; CHECK-LABEL: test_svprfw_u32base_vnum_min +; CHECK: prfw pstl3strm, p0, [z0.s] +entry: + %i64_bases = zext %bases to + tail call void @llvm.aarch64.sve.prfw.gather.nxv4i1( %pg, i8* null, %i64_bases, i32 13) + ret void +} + +define void @test_svprfw_u32base_vnum_max( %pg, %bases) +{ +; CHECK-LABEL: test_svprfw_u32base_vnum_max +; CHECK: prfw pstl3strm, p0, [z0.s, #124] +entry: + %i64_bases = zext %bases to + %offset = inttoptr i64 124 to i8* + tail call void @llvm.aarch64.sve.prfw.gather.nxv4i1( %pg, i8* %offset, %i64_bases, i32 13) + ret void +} +define void @test_svprfw_u32base_vnum_over( %pg, %bases) +{ +; CHECK-LABEL: test_svprfw_u32base_vnum_over +; CHECK: mov w[[BASE:[0-9]+]], #128 +; CHECK: prfb pstl3strm, p0, [x[[BASE]], z0.s, uxtw] +entry: + %i64_bases = zext %bases to + %offset = inttoptr i64 128 to i8* + tail call void @llvm.aarch64.sve.prfw.gather.nxv4i1( %pg, i8* %offset, %i64_bases, i32 13) + ret void +} + +define void @test_svprfd_u32base_vnum_under( %pg, %bases) +{ +; CHECK-LABEL: test_svprfd_u32base_vnum_under +; CHECK: mov x[[BASE:[0-9]+]], #-1 +; CHECK: prfb pstl3strm, p0, [x[[BASE]], z0.s, uxtw] +entry: + %i64_bases = zext %bases to + %offset = inttoptr i64 -1 to i8* + tail call void @llvm.aarch64.sve.prfd.gather.nxv4i1( %pg, i8* %offset, %i64_bases, i32 13) + ret void +} + +define void @test_svprfd_u32base_vnum_min( %pg, %bases) +{ +; CHECK-LABEL: test_svprfd_u32base_vnum_min +; CHECK: prfd pstl3strm, p0, [z0.s] +entry: + %i64_bases = zext %bases to + tail call void @llvm.aarch64.sve.prfd.gather.nxv4i1( %pg, i8* null, %i64_bases, i32 13) + ret void +} + +define void @test_svprfd_u32base_vnum_max( %pg, %bases) +{ +; CHECK-LABEL: test_svprfd_u32base_vnum_max +; CHECK: prfd pstl3strm, p0, [z0.s, #248] +entry: + %i64_bases = zext %bases to + %offset = inttoptr i64 248 to i8* + tail call void @llvm.aarch64.sve.prfd.gather.nxv4i1( %pg, i8* %offset, %i64_bases, i32 13) + ret void +} + +define void @test_svprfd_u32base_vnum_over( %pg, %bases) +{ +; CHECK-LABEL: test_svprfd_u32base_vnum_over +; CHECK: mov w[[BASE:[0-9]+]], #256 +; CHECK: prfb pstl3strm, p0, [x[[BASE]], z0.s, uxtw] +entry: + %i64_bases = zext %bases to + %offset = inttoptr i64 256 to i8* + tail call void @llvm.aarch64.sve.prfd.gather.nxv4i1( %pg, i8* %offset, %i64_bases, i32 13) + ret void +} + +; +; vector plus imm gather - 64-bit element +; +define void @test_svprfb_u64base_vnum_under( %pg, %bases) +{ +; CHECK-LABEL: test_svprfb_u64base_vnum_under +; CHECK: mov x[[BASE:[0-9]+]], #-1 +; CHECK: prfb pstl3strm, p0, [x[[BASE]], z0.d] +entry: + %offset = inttoptr i64 -1 to i8* + tail call void @llvm.aarch64.sve.prfb.gather.nxv2i1( %pg, i8* %offset, %bases, i32 13) + ret void +} + +define void @test_svprfb_u64base_vnum_min( %pg, %bases) +{ +; CHECK-LABEL: test_svprfb_u64base_vnum_min +; CHECK: prfb pstl3strm, p0, [z0.d] +entry: + tail call void @llvm.aarch64.sve.prfb.gather.nxv2i1( %pg, i8* null, %bases, i32 13) + ret void +} + +define void @test_svprfb_u64base_vnum_max( %pg, %bases) +{ +; CHECK-LABEL: test_svprfb_u64base_vnum_max +; CHECK: prfb pstl3strm, p0, [z0.d, #31] +entry: + %offset = inttoptr i64 31 to i8* + tail call void @llvm.aarch64.sve.prfb.gather.nxv2i1( %pg, i8* %offset, %bases, i32 13) + ret void +} + +define void @test_svprfb_u64base_vnum_over( %pg, %bases) +{ +; CHECK-LABEL: test_svprfb_u64base_vnum_over +; CHECK: mov w[[BASE:[0-9]+]], #32 +; CHECK: prfb pstl3strm, p0, [x[[BASE]], z0.d] +entry: + %offset = inttoptr i64 32 to i8* + tail call void @llvm.aarch64.sve.prfb.gather.nxv2i1( %pg, i8* %offset, %bases, i32 13) + ret void +} + +define void @test_svprfh_u64base_vnum_under( %pg, %bases) +{ +; CHECK-LABEL: test_svprfh_u64base_vnum_under +; CHECK: mov z[[SPLAT:[0-9]+]].d, #-1 +; CHECK: add z[[ADDR:[0-9]+]].d, z0.d, z[[SPLAT]].d +; CHECK: prfh pstl3strm, p0, [z[[ADDR]].d] +entry: + %offset = inttoptr i64 -1 to i8* + tail call void @llvm.aarch64.sve.prfh.gather.nxv2i1( %pg, i8* %offset, %bases, i32 13) + ret void +} + +define void @test_svprfh_u64base_vnum_min( %pg, %bases) +{ +; CHECK-LABEL: test_svprfh_u64base_vnum_min +; CHECK: prfh pstl3strm, p0, [z0.d] +entry: + tail call void @llvm.aarch64.sve.prfh.gather.nxv2i1( %pg, i8* null, %bases, i32 13) + ret void +} + +define void @test_svprfh_u64base_vnum_max( %pg, %bases) +{ +; CHECK-LABEL: test_svprfh_u64base_vnum_max +; CHECK: prfh pstl3strm, p0, [z0.d, #62] +entry: + %offset = inttoptr i64 62 to i8* + tail call void @llvm.aarch64.sve.prfh.gather.nxv2i1( %pg, i8* %offset, %bases, i32 13) + ret void +} + +define void @test_svprfh_u64base_vnum_over( %pg, %bases) +{ +; CHECK-LABEL: test_svprfh_u64base_vnum_over +; CHECK: add z[[ADDR:[0-9]+]].d, z0.d, #64 +; CHECK: prfh pstl3strm, p0, [z[[ADDR]].d] +entry: + %offset = inttoptr i64 64 to i8* + tail call void @llvm.aarch64.sve.prfh.gather.nxv2i1( %pg, i8* %offset, %bases, i32 13) + ret void +} + +define void @test_svprfw_u64base_vnum_under( %pg, %bases) +{ +; CHECK-LABEL: test_svprfw_u64base_vnum_under +; CHECK: mov z[[SPLAT:[0-9]+]].d, #-1 +; CHECK: add z[[ADDR:[0-9]+]].d, z0.d, z[[SPLAT]].d +; CHECK: prfw pstl3strm, p0, [z[[ADDR]].d] +entry: + %offset = inttoptr i64 -1 to i8* + tail call void @llvm.aarch64.sve.prfw.gather.nxv2i1( %pg, i8* %offset, %bases, i32 13) + ret void +} + +define void @test_svprfw_u64base_vnum_min( %pg, %bases) +{ +; CHECK-LABEL: test_svprfw_u64base_vnum_min +; CHECK: prfw pstl3strm, p0, [z0.d] +entry: + tail call void @llvm.aarch64.sve.prfw.gather.nxv2i1( %pg, i8* null, %bases, i32 13) + ret void +} + +define void @test_svprfw_u64base_vnum_max( %pg, %bases) +{ +; CHECK-LABEL: test_svprfw_u64base_vnum_max +; CHECK: prfw pstl3strm, p0, [z0.d, #124] +entry: + %offset = inttoptr i64 124 to i8* + tail call void @llvm.aarch64.sve.prfw.gather.nxv2i1( %pg, i8* %offset, %bases, i32 13) + ret void +} + +define void @test_svprfw_u64base_vnum_over( %pg, %bases) +{ +; CHECK-LABEL: test_svprfw_u64base_vnum_over +; CHECK: add z[[ADDR:[0-9]+]].d, z0.d, #128 +; CHECK: prfw pstl3strm, p0, [z[[ADDR]].d] +entry: + %offset = inttoptr i64 128 to i8* + tail call void @llvm.aarch64.sve.prfw.gather.nxv2i1( %pg, i8* %offset, %bases, i32 13) + ret void +} + +define void @test_svprfd_u64base_vnum_under( %pg, %bases) +{ +; CHECK-LABEL: test_svprfd_u64base_vnum_under +; CHECK: mov z[[SPLAT:[0-9]+]].d, #-1 +; CHECK: add z[[ADDR:[0-9]+]].d, z0.d, z[[SPLAT]].d +; CHECK: prfd pstl3strm, p0, [z[[ADDR]].d] +entry: + %offset = inttoptr i64 -1 to i8* + tail call void @llvm.aarch64.sve.prfd.gather.nxv2i1( %pg, i8* %offset, %bases, i32 13) + ret void +} + +define void @test_svprfd_u64base_vnum_min( %pg, %bases) +{ +; CHECK-LABEL: test_svprfd_u64base_vnum_min +; CHECK: prfd pstl3strm, p0, [z0.d] +entry: + tail call void @llvm.aarch64.sve.prfd.gather.nxv2i1( %pg, i8* null, %bases, i32 13) + ret void +} + +define void @test_svprfd_u64base_vnum_max( %pg, %bases) +{ +; CHECK-LABEL: test_svprfd_u64base_vnum_max +; CHECK: prfd pstl3strm, p0, [z0.d, #248] +entry: + %offset = inttoptr i64 248 to i8* + tail call void @llvm.aarch64.sve.prfd.gather.nxv2i1( %pg, i8* %offset, %bases, i32 13) + ret void +} + +define void @test_svprfd_u64base_vnum_over( %pg, %bases) +{ +; CHECK-LABEL: test_svprfd_u64base_vnum_over +; CHECK: add z[[ADDR:[0-9]+]].d, z0.d, #256 +; CHECK: prfd pstl3strm, p0, [z[[ADDR]].d] +entry: + %offset = inttoptr i64 256 to i8* + tail call void @llvm.aarch64.sve.prfd.gather.nxv2i1( %pg, i8* %offset, %bases, i32 13) + ret void +} + + + +declare void @llvm.aarch64.sve.prf.nxv16i1(, i8*, i32) +declare void @llvm.aarch64.sve.prf.nxv8i1(, i8*, i32) +declare void @llvm.aarch64.sve.prf.nxv4i1(, i8*, i32) +declare void @llvm.aarch64.sve.prf.nxv2i1(, i8*, i32) +declare void @llvm.aarch64.sve.prfb.gather.nxv4i1(, i8*, , i32) +declare void @llvm.aarch64.sve.prfh.gather.nxv4i1(, i8*, , i32) +declare void @llvm.aarch64.sve.prfw.gather.nxv4i1(, i8*, , i32) +declare void @llvm.aarch64.sve.prfd.gather.nxv4i1(, i8*, , i32) +declare void @llvm.aarch64.sve.prfb.gather.nxv2i1(, i8*, , i32) +declare void @llvm.aarch64.sve.prfh.gather.nxv2i1(, i8*, , i32) +declare void @llvm.aarch64.sve.prfw.gather.nxv2i1(, i8*, , i32) +declare void @llvm.aarch64.sve.prfd.gather.nxv2i1(, i8*, , i32)