diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1254,6 +1254,37 @@ ], [IntrWriteMem, IntrArgMemOnly]>; + +class SVE_prf_scaled + : Intrinsic<[], + [ + llvm_anyptr_ty, // Base address + llvm_anyvector_ty, // offsets + LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>, // Predicate + llvm_i32_ty // prfop + ], + [IntrArgMemOnly, NoCapture<0>, ImmArg<3>]>; + +def int_aarch64_sve_gather_prf_scaled_uxtw : SVE_prf_scaled; +def int_aarch64_sve_gather_prf_scaled_sxtw : SVE_prf_scaled; +def int_aarch64_sve_gather_prf_scaled : SVE_prf_scaled; + +class SVE_gather_prf + : Intrinsic<[], + [ + llvm_anyvector_ty, // Base address + llvm_i64_ty, // immediate offset + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, // Predicate + llvm_i32_ty // prfop + ], + [IntrInaccessibleMemOrArgMemOnly, ImmArg<1>, ImmArg<3>]>; + +def int_aarch64_sve_gather_prfb : SVE_gather_prf; +def int_aarch64_sve_gather_prfh : SVE_gather_prf; +def int_aarch64_sve_gather_prfw : SVE_gather_prf; +def int_aarch64_sve_gather_prfd : SVE_gather_prf; + + // // Loads // diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8887,6 +8887,20 @@ unsigned Intrinsic) const { auto &DL = I.getModule()->getDataLayout(); switch (Intrinsic) { + case Intrinsic::aarch64_sve_gather_prf_scaled: + case Intrinsic::aarch64_sve_gather_prf_scaled_sxtw: + case Intrinsic::aarch64_sve_gather_prf_scaled_uxtw: { + ElementCount EC = I.getArgOperand(1)->getType()->getVectorElementCount(); + Info.opc = ISD::INTRINSIC_VOID; + Type *PointeeTy = I.getArgOperand(0)->getType()->getPointerElementType(); + Info.memVT = + EVT::getVectorVT(I.getType()->getContext(), EVT::getEVT(PointeeTy), EC); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align.reset(); + Info.flags = MachineMemOperand::MOLoad; + return true; + } case Intrinsic::aarch64_neon_ld2: case Intrinsic::aarch64_neon_ld3: case Intrinsic::aarch64_neon_ld4: diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -10,6 +10,94 @@ // //===----------------------------------------------------------------------===// +class gather_prf_scaled_pat_frag + : PatFrag<(ops node:$base, node:$vec_offset, node:$pred, node:$prfop), + (intrinsic node:$base, node:$vec_offset, node:$pred, node:$prfop), + predicate>; + + def gather_prfb_scaled_uxtw + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv4i8; }]>; + + def gather_prfb_scaled_sxtw + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv4i8; }]>; + + def gather_prfb_scaled_unpacked_uxtw + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv2i8; }]>; + + def gather_prfb_scaled_unpacked_sxtw + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv2i8; }]>; + + def gather_prfb_scaled + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv2i8; }]>; + + def gather_prfh_scaled_uxtw + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv4i16 || cast(N)->getMemoryVT() == MVT::nxv4f16; }]>; + + def gather_prfh_scaled_sxtw + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv4i16 || cast(N)->getMemoryVT() == MVT::nxv4f16; }]>; + + def gather_prfh_scaled_unpacked_uxtw + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv2i16 || cast(N)->getMemoryVT() == MVT::nxv2f16; }]>; + + def gather_prfh_scaled_unpacked_sxtw + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv2i16 || cast(N)->getMemoryVT() == MVT::nxv2f16; }]>; + + def gather_prfh_scaled + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv2i16 || cast(N)->getMemoryVT() == MVT::nxv2f16; }]>; + + + def gather_prfw_scaled_uxtw + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv4i32 || cast(N)->getMemoryVT() == MVT::nxv4f32; }]>; + + def gather_prfw_scaled_sxtw + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv4i32 || cast(N)->getMemoryVT() == MVT::nxv4f32; }]>; + + def gather_prfw_scaled_unpacked_uxtw + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv2i32 || cast(N)->getMemoryVT() == MVT::nxv2f32; }]>; + + def gather_prfw_scaled_unpacked_sxtw + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv2i32 || cast(N)->getMemoryVT() == MVT::nxv2f32; }]>; + + def gather_prfw_scaled + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv2i32 || cast(N)->getMemoryVT() == MVT::nxv2f32; }]>; + + + def gather_prfd_scaled_uxtw + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv4i64 || cast(N)->getMemoryVT() == MVT::nxv4f64; }]>; + + def gather_prfd_scaled_sxtw + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv4i64 || cast(N)->getMemoryVT() == MVT::nxv4f64; }]>; + + def gather_prfd_scaled_unpacked_uxtw + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv2i64 || cast(N)->getMemoryVT() == MVT::nxv2f64; }]>; + + def gather_prfd_scaled_unpacked_sxtw + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv2i64 || cast(N)->getMemoryVT() == MVT::nxv2f64; }]>; + + def gather_prfd_scaled + : gather_prf_scaled_pat_frag(N)->getMemoryVT() == MVT::nxv2i64 || cast(N)->getMemoryVT() == MVT::nxv2f64; }]>; + + def SVE8BitLslImm : ComplexPattern; // Non-faulting loads - node definitions @@ -880,37 +968,37 @@ // Gather prefetch using scaled 32-bit offsets, e.g. // prfh pldl1keep, p0, [x0, z0.s, uxtw #1] - defm PRFB_S : sve_mem_32b_prfm_sv_scaled<0b00, "prfb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>; - defm PRFH_S : sve_mem_32b_prfm_sv_scaled<0b01, "prfh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>; - defm PRFW_S : sve_mem_32b_prfm_sv_scaled<0b10, "prfw", ZPR32ExtSXTW32, ZPR32ExtUXTW32>; - defm PRFD_S : sve_mem_32b_prfm_sv_scaled<0b11, "prfd", ZPR32ExtSXTW64, ZPR32ExtUXTW64>; + defm PRFB_S : sve_mem_32b_prfm_sv_scaled<0b00, "prfb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, gather_prfb_scaled_sxtw, gather_prfb_scaled_uxtw>; + defm PRFH_S : sve_mem_32b_prfm_sv_scaled<0b01, "prfh", ZPR32ExtSXTW16, ZPR32ExtUXTW16, gather_prfh_scaled_sxtw, gather_prfh_scaled_uxtw>; + defm PRFW_S : sve_mem_32b_prfm_sv_scaled<0b10, "prfw", ZPR32ExtSXTW32, ZPR32ExtUXTW32, gather_prfw_scaled_sxtw, gather_prfw_scaled_uxtw>; + defm PRFD_S : sve_mem_32b_prfm_sv_scaled<0b11, "prfd", ZPR32ExtSXTW64, ZPR32ExtUXTW64, gather_prfd_scaled_sxtw, gather_prfd_scaled_uxtw>; // Gather prefetch using unpacked, scaled 32-bit offsets, e.g. // prfh pldl1keep, p0, [x0, z0.d, uxtw #1] - defm PRFB_D : sve_mem_64b_prfm_sv_ext_scaled<0b00, "prfb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>; - defm PRFH_D : sve_mem_64b_prfm_sv_ext_scaled<0b01, "prfh", ZPR64ExtSXTW16, ZPR64ExtUXTW16>; - defm PRFW_D : sve_mem_64b_prfm_sv_ext_scaled<0b10, "prfw", ZPR64ExtSXTW32, ZPR64ExtUXTW32>; - defm PRFD_D : sve_mem_64b_prfm_sv_ext_scaled<0b11, "prfd", ZPR64ExtSXTW64, ZPR64ExtUXTW64>; + defm PRFB_D : sve_mem_64b_prfm_sv_ext_scaled<0b00, "prfb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, gather_prfb_scaled_unpacked_sxtw, gather_prfb_scaled_unpacked_uxtw>; + defm PRFH_D : sve_mem_64b_prfm_sv_ext_scaled<0b01, "prfh", ZPR64ExtSXTW16, ZPR64ExtUXTW16, gather_prfh_scaled_unpacked_sxtw, gather_prfh_scaled_unpacked_uxtw>; + defm PRFW_D : sve_mem_64b_prfm_sv_ext_scaled<0b10, "prfw", ZPR64ExtSXTW32, ZPR64ExtUXTW32, gather_prfw_scaled_unpacked_sxtw, gather_prfw_scaled_unpacked_uxtw>; + defm PRFD_D : sve_mem_64b_prfm_sv_ext_scaled<0b11, "prfd", ZPR64ExtSXTW64, ZPR64ExtUXTW64, gather_prfd_scaled_unpacked_sxtw, gather_prfd_scaled_unpacked_uxtw>; // Gather prefetch using scaled 64-bit offsets, e.g. // prfh pldl1keep, p0, [x0, z0.d, lsl #1] - defm PRFB_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b00, "prfb", ZPR64ExtLSL8>; - defm PRFH_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b01, "prfh", ZPR64ExtLSL16>; - defm PRFW_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b10, "prfw", ZPR64ExtLSL32>; - defm PRFD_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b11, "prfd", ZPR64ExtLSL64>; + defm PRFB_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b00, "prfb", ZPR64ExtLSL8, gather_prfb_scaled>; + defm PRFH_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b01, "prfh", ZPR64ExtLSL16, gather_prfh_scaled>; + defm PRFW_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b10, "prfw", ZPR64ExtLSL32, gather_prfw_scaled>; + defm PRFD_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b11, "prfd", ZPR64ExtLSL64, gather_prfd_scaled>; // Gather prefetch using 32/64-bit pointers with offset, e.g. // prfh pldl1keep, p0, [z0.s, #16] // prfh pldl1keep, p0, [z0.d, #16] - defm PRFB_S_PZI : sve_mem_32b_prfm_vi<0b00, "prfb", imm0_31>; - defm PRFH_S_PZI : sve_mem_32b_prfm_vi<0b01, "prfh", uimm5s2>; - defm PRFW_S_PZI : sve_mem_32b_prfm_vi<0b10, "prfw", uimm5s4>; - defm PRFD_S_PZI : sve_mem_32b_prfm_vi<0b11, "prfd", uimm5s8>; - - defm PRFB_D_PZI : sve_mem_64b_prfm_vi<0b00, "prfb", imm0_31>; - defm PRFH_D_PZI : sve_mem_64b_prfm_vi<0b01, "prfh", uimm5s2>; - defm PRFW_D_PZI : sve_mem_64b_prfm_vi<0b10, "prfw", uimm5s4>; - defm PRFD_D_PZI : sve_mem_64b_prfm_vi<0b11, "prfd", uimm5s8>; + defm PRFB_S_PZI : sve_mem_32b_prfm_vi<0b00, "prfb", timm0_31, int_aarch64_sve_gather_prfb>; + defm PRFH_S_PZI : sve_mem_32b_prfm_vi<0b01, "prfh", tuimm5s2, int_aarch64_sve_gather_prfh>; + defm PRFW_S_PZI : sve_mem_32b_prfm_vi<0b10, "prfw", tuimm5s4, int_aarch64_sve_gather_prfw>; + defm PRFD_S_PZI : sve_mem_32b_prfm_vi<0b11, "prfd", tuimm5s8, int_aarch64_sve_gather_prfd>; + + defm PRFB_D_PZI : sve_mem_64b_prfm_vi<0b00, "prfb", timm0_31, int_aarch64_sve_gather_prfb>; + defm PRFH_D_PZI : sve_mem_64b_prfm_vi<0b01, "prfh", tuimm5s2, int_aarch64_sve_gather_prfh>; + defm PRFW_D_PZI : sve_mem_64b_prfm_vi<0b10, "prfw", tuimm5s4, int_aarch64_sve_gather_prfw>; + defm PRFD_D_PZI : sve_mem_64b_prfm_vi<0b11, "prfd", tuimm5s8, int_aarch64_sve_gather_prfd>; defm ADR_SXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_sxtw<0b00, "adr">; defm ADR_UXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_uxtw<0b01, "adr">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -6441,9 +6441,17 @@ multiclass sve_mem_32b_prfm_sv_scaled msz, string asm, RegisterOperand sxtw_opnd, - RegisterOperand uxtw_opnd> { + RegisterOperand uxtw_opnd, + PatFrag op_sxtw, + PatFrag op_uxtw> { def _UXTW_SCALED : sve_mem_32b_prfm_sv; def _SXTW_SCALED : sve_mem_32b_prfm_sv; + def : Pat<(op_uxtw (i64 GPR64sp:$Rn), (nxv4i32 uxtw_opnd:$Zm), (nxv4i1 PPR3bAny:$Pg), (i32 sve_prfop:$prfop)), + (!cast(NAME # _UXTW_SCALED) sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>; + + def : Pat<(op_sxtw (i64 GPR64sp:$Rn), (nxv4i32 sxtw_opnd:$Zm), (nxv4i1 PPR3bAny:$Pg), (i32 sve_prfop:$prfop)), + (!cast(NAME # _SXTW_SCALED) sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>; + } class sve_mem_32b_prfm_vi msz, string asm, Operand imm_ty> @@ -6466,9 +6474,12 @@ let Inst{3-0} = prfop; } -multiclass sve_mem_32b_prfm_vi msz, string asm, Operand imm_ty> { +multiclass sve_mem_32b_prfm_vi msz, string asm, Operand imm_ty, SDPatternOperator op> { def NAME : sve_mem_32b_prfm_vi; + def : Pat<(op (nxv4i32 ZPR32:$Zn), (i64 imm_ty:$imm), (nxv4i1 PPR_3b:$Pg), (i32 sve_prfop:$prfop)), + (!cast(NAME) sve_prfop:$prfop, PPR_3b:$Pg, ZPR32:$Zn, imm_ty:$imm)>; + def : InstAlias(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>; } @@ -6784,14 +6795,27 @@ multiclass sve_mem_64b_prfm_sv_ext_scaled msz, string asm, RegisterOperand sxtw_opnd, - RegisterOperand uxtw_opnd> { + RegisterOperand uxtw_opnd, + PatFrag op_sxtw, + PatFrag op_uxtw> { def _UXTW_SCALED : sve_mem_64b_prfm_sv; def _SXTW_SCALED : sve_mem_64b_prfm_sv; + + def : Pat<(op_uxtw (i64 GPR64sp:$Rn), (nxv2i64 uxtw_opnd:$Zm), (nxv2i1 PPR3bAny:$Pg), (i32 sve_prfop:$prfop)), + (!cast(NAME # _UXTW_SCALED) sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>; + + def : Pat<(op_sxtw (i64 GPR64sp:$Rn), (nxv2i64 sxtw_opnd:$Zm), (nxv2i1 PPR3bAny:$Pg), (i32 sve_prfop:$prfop)), + (!cast(NAME # _SXTW_SCALED) sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>; + } multiclass sve_mem_64b_prfm_sv_lsl_scaled msz, string asm, - RegisterOperand zprext> { + RegisterOperand zprext, PatFrag frag> { def NAME : sve_mem_64b_prfm_sv; + + def : Pat<(frag (i64 GPR64sp:$Rn), (nxv2i64 zprext:$Zm), (nxv2i1 PPR3bAny:$Pg), (i32 sve_prfop:$prfop)), + (!cast(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm)>; + } @@ -6817,9 +6841,12 @@ let hasSideEffects = 1; } -multiclass sve_mem_64b_prfm_vi msz, string asm, Operand imm_ty> { +multiclass sve_mem_64b_prfm_vi msz, string asm, Operand imm_ty, SDPatternOperator op> { def NAME : sve_mem_64b_prfm_vi; + def : Pat<(op (nxv2i64 ZPR32:$Zn), (i64 imm_ty:$imm), (nxv2i1 PPR_3b:$Pg), (i32 sve_prfop:$prfop)), + (!cast(NAME) sve_prfop:$prfop, PPR_3b:$Pg, ZPR32:$Zn, imm_ty:$imm)>; + def : InstAlias(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>; } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-scaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-scaled-offset.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-scaled-offset.ll @@ -0,0 +1,360 @@ +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s | FileCheck %s + +; PRFB , , [, .S, ] -> 32-bit scaled offset +declare void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0i8.nx4vi32(i8* %base, %offset, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prf_scaled_uxtw_p0i8_nx4vi32(i8* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_uxtw_p0i8_nx4vi32: +; CHECK: prfb pldl1strm, p0, [x0, z0.s, uxtw] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0i8.nx4vi32(i8* %base, %offset, %Pg, i32 1) + ret void + } + + + +declare void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0i8.nx4vi32(i8* %base, %offset, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prf_scaled_sxtw_p0i8_nx4vi32(i8* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_sxtw_p0i8_nx4vi32: +; CHECK: prfb pldl1strm, p0, [x0, z0.s, sxtw] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0i8.nx4vi32(i8* %base, %offset, %Pg, i32 1) + ret void + } + + +; PRFB , , [, .D, ] -> 32-bit unpacked scaled offset + +declare void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0i8.nx2vi64(i8* %base, %offset, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prf_scaled_uxtw_p0i8_nx2vi64(i8* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_uxtw_p0i8_nx2vi64: +; CHECK: prfb pldl1strm, p0, [x0, z0.d, uxtw] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0i8.nx2vi64(i8* %base, %offset, %Pg, i32 1) + ret void + } + + + +declare void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0i8.nx2vi64(i8* %base, %offset, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prf_scaled_sxtw_p0i8_nx2vi64(i8* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_sxtw_p0i8_nx2vi64: +; CHECK: prfb pldl1strm, p0, [x0, z0.d, sxtw] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0i8.nx2vi64(i8* %base, %offset, %Pg, i32 1) + ret void + } + + +; PRFB , , [, .D] -> 64-bit scaled offset +declare void @llvm.aarch64.sve.gather.prf.scaled.p0i8.nx2vi64(i8* %base, %offset, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prf_scaled_p0i8_nx2vi64(i8* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_p0i8_nx2vi64: +; CHECK: prfb pldl1strm, p0, [x0, z0.d] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.p0i8.nx2vi64(i8* %base, %offset, %Pg, i32 1) + ret void + } + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; PRFH , , [, .S, ] -> 32-bit scaled offset +declare void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0i16.nx4vi32(i16* %base, %offset, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prf_scaled_uxtw_p0i16_nx4vi32(i16* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_uxtw_p0i16_nx4vi32: +; CHECK: prfh pldl1strm, p0, [x0, z0.s, uxtw #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0i16.nx4vi32(i16* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0i16.nx4vi32(i16* %base, %offset, %Pg, i32 %prfop) +define void @llvm_aarch64_sve_gather_prf_scaled_sxtw_p0i16_nx4vi32(i16* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_sxtw_p0i16_nx4vi32: +; CHECK: prfh pldl1strm, p0, [x0, z0.s, sxtw #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0i16.nx4vi32(i16* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0f16.nx4vi32(half* %base, %offset, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prf_scaled_uxtw_p0f16_nx4vi32(half* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_uxtw_p0f16_nx4vi32: +; CHECK: prfh pldl1strm, p0, [x0, z0.s, uxtw #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0f16.nx4vi32(half* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0f16.nx4vi32(half* %base, %offset, %Pg, i32 %prfop) +define void @llvm_aarch64_sve_gather_prf_scaled_sxtw_p0f16_nx4vi32(half* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_sxtw_p0f16_nx4vi32: +; CHECK: prfh pldl1strm, p0, [x0, z0.s, sxtw #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0f16.nx4vi32(half* %base, %offset, %Pg, i32 1) + ret void + } + +; PRFH , , [, .D, #1] -> 32-bit unpacked scaled offset +declare void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0i16.nx2vi64(i16* %base, %offset, %Pg, i32 %prfop) +define void @llvm_aarch64_sve_gather_prf_scaled_uxtw_p0i16_nx2vi64(i16* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_uxtw_p0i16_nx2vi64: +; CHECK: prfh pldl1strm, p0, [x0, z0.d, uxtw #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0i16.nx2vi64(i16* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0i16.nx2vi64(i16* %base, %offset, %Pg, i32 %prfop) +define void @llvm_aarch64_sve_gather_prf_scaled_sxtw_p0i16_nx2vi64(i16* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_sxtw_p0i16_nx2vi64: +; CHECK: prfh pldl1strm, p0, [x0, z0.d, sxtw #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0i16.nx2vi64(i16* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0f16.nx2vi64(half* %base, %offset, %Pg, i32 %prfop) +define void @llvm_aarch64_sve_gather_prf_scaled_uxtw_p0f16_nx2vi64(half* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_uxtw_p0f16_nx2vi64: +; CHECK: prfh pldl1strm, p0, [x0, z0.d, uxtw #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0f16.nx2vi64(half* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0f16.nx2vi64(half* %base, %offset, %Pg, i32 %prfop) +define void @llvm_aarch64_sve_gather_prf_scaled_sxtw_p0f16_nx2vi64(half* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_sxtw_p0f16_nx2vi64: +; CHECK: prfh pldl1strm, p0, [x0, z0.d, sxtw #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0f16.nx2vi64(half* %base, %offset, %Pg, i32 1) + ret void + } + +; PRFH , , [, .D] -> 64-bit scaled offset +declare void @llvm.aarch64.sve.gather.prf.scaled.p0i16.nx2vi64(i16* %base, %offset, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prf_scaled_p0i16_nx2vi64(i16* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_p0i16_nx2vi64: +; CHECK: prfh pldl1strm, p0, [x0, z0.d, lsl #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.p0i16.nx2vi64(i16* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.p0f16.nx2vi64(half* %base, %offset, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prf_scaled_p0f16_nx2vi64(half* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_p0f16_nx2vi64: +; CHECK: prfh pldl1strm, p0, [x0, z0.d, lsl #1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.p0f16.nx2vi64(half* %base, %offset, %Pg, i32 1) + ret void + } + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; PRFW , , [, .S, ] -> 32-bit scaled offset +declare void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0i32.nx4vi32(i32* %base, %offset, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prf_scaled_uxtw_p0i32_nx4vi32(i32* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_uxtw_p0i32_nx4vi32: +; CHECK: prfw pldl1strm, p0, [x0, z0.s, uxtw #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0i32.nx4vi32(i32* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0i32.nx4vi32(i32* %base, %offset, %Pg, i32 %prfop) +define void @llvm_aarch64_sve_gather_prf_scaled_sxtw_p0i32_nx4vi32(i32* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_sxtw_p0i32_nx4vi32: +; CHECK: prfw pldl1strm, p0, [x0, z0.s, sxtw #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0i32.nx4vi32(i32* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0f32.nx4vi32(float* %base, %offset, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prf_scaled_uxtw_p0f32_nx4vi32(float* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_uxtw_p0f32_nx4vi32: +; CHECK: prfw pldl1strm, p0, [x0, z0.s, uxtw #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0f32.nx4vi32(float* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0f32.nx4vi32(float* %base, %offset, %Pg, i32 %prfop) +define void @llvm_aarch64_sve_gather_prf_scaled_sxtw_p0f32_nx4vi32(float* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_sxtw_p0f32_nx4vi32: +; CHECK: prfw pldl1strm, p0, [x0, z0.s, sxtw #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0f32.nx4vi32(float* %base, %offset, %Pg, i32 1) + ret void + } + +; PRFW , , [, .D, #2] -> 32-bit unpacked scaled offset +declare void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0i32.nx2vi64(i32* %base, %offset, %Pg, i32 %prfop) +define void @llvm_aarch64_sve_gather_prf_scaled_uxtw_p0i32_nx2vi64(i32* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_uxtw_p0i32_nx2vi64: +; CHECK: prfw pldl1strm, p0, [x0, z0.d, uxtw #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0i32.nx2vi64(i32* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0i32.nx2vi64(i32* %base, %offset, %Pg, i32 %prfop) +define void @llvm_aarch64_sve_gather_prf_scaled_sxtw_p0i32_nx2vi64(i32* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_sxtw_p0i32_nx2vi64: +; CHECK: prfw pldl1strm, p0, [x0, z0.d, sxtw #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0i32.nx2vi64(i32* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0f32.nx2vi64(float* %base, %offset, %Pg, i32 %prfop) +define void @llvm_aarch64_sve_gather_prf_scaled_uxtw_p0f32_nx2vi64(float* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_uxtw_p0f32_nx2vi64: +; CHECK: prfw pldl1strm, p0, [x0, z0.d, uxtw #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0f32.nx2vi64(float* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0f32.nx2vi64(float* %base, %offset, %Pg, i32 %prfop) +define void @llvm_aarch64_sve_gather_prf_scaled_sxtw_p0f32_nx2vi64(float* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_sxtw_p0f32_nx2vi64: +; CHECK: prfw pldl1strm, p0, [x0, z0.d, sxtw #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0f32.nx2vi64(float* %base, %offset, %Pg, i32 1) + ret void + } + +; PRFW , , [, .D] -> 64-bit scaled offset +declare void @llvm.aarch64.sve.gather.prf.scaled.p0i32.nx2vi64(i32* %base, %offset, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prf_scaled_p0i32_nx2vi64(i32* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_p0i32_nx2vi64: +; CHECK: prfw pldl1strm, p0, [x0, z0.d, lsl #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.p0i32.nx2vi64(i32* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.p0f32.nx2vi64(float* %base, %offset, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prf_scaled_p0f32_nx2vi64(float* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_p0f32_nx2vi64: +; CHECK: prfw pldl1strm, p0, [x0, z0.d, lsl #2] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.p0f32.nx2vi64(float* %base, %offset, %Pg, i32 1) + ret void + } + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + + + +; PRFD , , [, .S, ] -> 32-bit scaled offset +declare void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0i64.nx4vi32(i64* %base, %offset, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prf_scaled_uxtw_p0i64_nx4vi32(i64* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_uxtw_p0i64_nx4vi32: +; CHECK: prfd pldl1strm, p0, [x0, z0.s, uxtw #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0i64.nx4vi32(i64* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0i64.nx4vi32(i64* %base, %offset, %Pg, i32 %prfop) +define void @llvm_aarch64_sve_gather_prf_scaled_sxtw_p0i64_nx4vi32(i64* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_sxtw_p0i64_nx4vi32: +; CHECK: prfd pldl1strm, p0, [x0, z0.s, sxtw #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0i64.nx4vi32(i64* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0f64.nx4vi32(double* %base, %offset, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prf_scaled_uxtw_p0f64_nx4vi32(double* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_uxtw_p0f64_nx4vi32: +; CHECK: prfd pldl1strm, p0, [x0, z0.s, uxtw #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0f64.nx4vi32(double* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0f64.nx4vi32(double* %base, %offset, %Pg, i32 %prfop) +define void @llvm_aarch64_sve_gather_prf_scaled_sxtw_p0f64_nx4vi32(double* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_sxtw_p0f64_nx4vi32: +; CHECK: prfd pldl1strm, p0, [x0, z0.s, sxtw #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0f64.nx4vi32(double* %base, %offset, %Pg, i32 1) + ret void + } + +; PRFD , , [, .D, #3] -> 32-bit unpacked scaled offset +declare void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0i64.nx2vi64(i64* %base, %offset, %Pg, i32 %prfop) +define void @llvm_aarch64_sve_gather_prf_scaled_uxtw_p0i64_nx2vi64(i64* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_uxtw_p0i64_nx2vi64: +; CHECK: prfd pldl1strm, p0, [x0, z0.d, uxtw #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0i64.nx2vi64(i64* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0i64.nx2vi64(i64* %base, %offset, %Pg, i32 %prfop) +define void @llvm_aarch64_sve_gather_prf_scaled_sxtw_p0i64_nx2vi64(i64* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_sxtw_p0i64_nx2vi64: +; CHECK: prfd pldl1strm, p0, [x0, z0.d, sxtw #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0i64.nx2vi64(i64* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0f64.nx2vi64(double* %base, %offset, %Pg, i32 %prfop) +define void @llvm_aarch64_sve_gather_prf_scaled_uxtw_p0f64_nx2vi64(double* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_uxtw_p0f64_nx2vi64: +; CHECK: prfd pldl1strm, p0, [x0, z0.d, uxtw #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.uxtw.p0f64.nx2vi64(double* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0f64.nx2vi64(double* %base, %offset, %Pg, i32 %prfop) +define void @llvm_aarch64_sve_gather_prf_scaled_sxtw_p0f64_nx2vi64(double* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_sxtw_p0f64_nx2vi64: +; CHECK: prfd pldl1strm, p0, [x0, z0.d, sxtw #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.sxtw.p0f64.nx2vi64(double* %base, %offset, %Pg, i32 1) + ret void + } + +; PRFD , , [, .D] -> 64-bit scaled offset +declare void @llvm.aarch64.sve.gather.prf.scaled.p0i64.nx2vi64(i64* %base, %offset, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prf_scaled_p0i64_nx2vi64(i64* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_p0i64_nx2vi64: +; CHECK: prfd pldl1strm, p0, [x0, z0.d, lsl #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.p0i64.nx2vi64(i64* %base, %offset, %Pg, i32 1) + ret void + } + +declare void @llvm.aarch64.sve.gather.prf.scaled.p0f64.nx2vi64(double* %base, %offset, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prf_scaled_p0f64_nx2vi64(double* %base, %offset, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prf_scaled_p0f64_nx2vi64: +; CHECK: prfd pldl1strm, p0, [x0, z0.d, lsl #3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prf.scaled.p0f64.nx2vi64(double* %base, %offset, %Pg, i32 1) + ret void + } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-imm-offset.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-imm-offset.ll @@ -0,0 +1,90 @@ +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s | FileCheck %s + +; PRFB , , [.S{, #}] -> 32-bit element +declare void @llvm.aarch64.sve.gather.prfb.nx4vi32( %bases, i64 %imm, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prfb_nx4vi32( %bases, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prfb_nx4vi32: +; CHECK: prfb pldl1strm, p0, [z0.s, #7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prfb.nx4vi32( %bases, i64 7, %Pg, i32 1) + ret void +} + +; PRFB , , [.D{, #}] -> 64-bit element +declare void @llvm.aarch64.sve.gather.prfb.nx2vi64( %bases, i64 %imm, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prfb_nx2vi64( %bases, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prfb_nx2vi64: +; CHECK: prfb pldl1strm, p0, [z0.d, #7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prfb.nx2vi64( %bases, i64 7, %Pg, i32 1) + ret void +} + +; PRFH , , [.S{, #}] -> 32-bit element +declare void @llvm.aarch64.sve.gather.prfh.nx4vi32( %bases, i64 %imm, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prfh_nx4vi32( %bases, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prfh_nx4vi32: +; CHECK: prfh pldl1strm, p0, [z0.s, #6] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prfh.nx4vi32( %bases, i64 6, %Pg, i32 1) + ret void +} + + +; PRFH , , [.D{, #}] -> 64-bit element +declare void @llvm.aarch64.sve.gather.prfh.nx2vi64( %bases, i64 %imm, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prfh_nx2vi64( %bases, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prfh_nx2vi64: +; CHECK: prfh pldl1strm, p0, [z0.d, #6] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prfh.nx2vi64( %bases, i64 6, %Pg, i32 1) + ret void +} + +; PRFW , , [.S{, #}] -> 32-bit element +declare void @llvm.aarch64.sve.gather.prfw.nx4vi32( %bases, i64 %imm, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prfw_nx4vi32( %bases, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prfw_nx4vi32: +; CHECK: prfw pldl1strm, p0, [z0.s, #12] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prfw.nx4vi32( %bases, i64 12, %Pg, i32 1) + ret void +} + +; PRFW , , [.D{, #}] -> 64-bit element +declare void @llvm.aarch64.sve.gather.prfw.nx2vi64( %bases, i64 %imm, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prfw_nx2vi64( %bases, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prfw_nx2vi64: +; CHECK: prfw pldl1strm, p0, [z0.d, #12] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prfw.nx2vi64( %bases, i64 12, %Pg, i32 1) + ret void +} + +; PRFD , , [.S{, #}] -> 32-bit element +declare void @llvm.aarch64.sve.gather.prfd.nx4vi32( %bases, i64 %imm, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prfd_nx4vi32( %bases, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prfd_nx4vi32: +; CHECK: prfd pldl1strm, p0, [z0.s, #16] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prfd.nx4vi32( %bases, i64 16, %Pg, i32 1) + ret void +} + +; PRFD , , [.D{, #}] -> 64-bit element +declare void @llvm.aarch64.sve.gather.prfd.nx2vi64( %bases, i64 %imm, %Pg, i32 %prfop) + +define void @llvm_aarch64_sve_gather_prfd_nx2vi64( %bases, %Pg) { +; CHECK-LABEL: llvm_aarch64_sve_gather_prfd_nx2vi64: +; CHECK: prfd pldl1strm, p0, [z0.d, #16] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.gather.prfd.nx2vi64( %bases, i64 16, %Pg, i32 1) + ret void +}