diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1082,7 +1082,7 @@ ], [IntrReadMem, IntrArgMemOnly]>; -class AdvSIMD_GatherLoad_VecTorBase_Intrinsic +class AdvSIMD_GatherLoad_VectorBase_Intrinsic : Intrinsic<[llvm_anyvector_ty], [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, @@ -1464,27 +1464,28 @@ def int_aarch64_sve_punpklo : AdvSIMD_SVE_PUNPKHI_Intrinsic; // -// Gather loads: +// Gather loads: scalar base + vector offsets // -// scalar + vector, 64 bit unscaled offsets +// 64 bit unscaled offsets def int_aarch64_sve_ld1_gather : AdvSIMD_GatherLoad_64bitOffset_Intrinsic; -// scalar + vector, 64 bit scaled offsets +// 64 bit scaled offsets def int_aarch64_sve_ld1_gather_index : AdvSIMD_GatherLoad_64bitOffset_Intrinsic; -// scalar + vector, 32 bit unscaled offsets, sign (sxtw) or zero (zxtw) -// extended to 64 bits +// 32 bit unscaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits def int_aarch64_sve_ld1_gather_sxtw : AdvSIMD_GatherLoad_32bitOffset_Intrinsic; def int_aarch64_sve_ld1_gather_uxtw : AdvSIMD_GatherLoad_32bitOffset_Intrinsic; -// scalar + vector, 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended -// to 64 bits +// 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits def int_aarch64_sve_ld1_gather_sxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic; def int_aarch64_sve_ld1_gather_uxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic; -// vector base + immediate index -def int_aarch64_sve_ld1_gather_imm : AdvSIMD_GatherLoad_VecTorBase_Intrinsic; +// +// Gather loads: vector base + scalar offset +// + +def int_aarch64_sve_ld1_gather_scalar_offset : AdvSIMD_GatherLoad_VectorBase_Intrinsic; // // Scatter stores: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12197,10 +12197,24 @@ // Depending on the addressing mode, this is either a pointer or a vector of // pointers (that fits into one register) - const SDValue Base = N->getOperand(3); + SDValue Base = N->getOperand(3); // Depending on the addressing mode, this is either a single offset or a // vector of offsets (that fits into one register) - const SDValue Offset = N->getOperand(4); + SDValue Offset = N->getOperand(4); + + // GLD1_IMM requires that the offset is an immediate in the range 0-31. For + // immediates outside that range and non-immediate scalar offsets use GLD1 or + // GLD1_UXTW instead. + if (Opcode == AArch64ISD::GLD1_IMM && + (!isa(Offset.getNode()) || + cast(Offset.getNode())->getZExtValue() > 31)) { + if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy) + Opcode = AArch64ISD::GLD1_UXTW; + else + Opcode = AArch64ISD::GLD1; + + std::swap(Base, Offset); + } if (!DAG.getTargetLoweringInfo().isTypeLegal(Base.getValueType()) || !DAG.getTargetLoweringInfo().isTypeLegal(Offset.getValueType())) @@ -12398,7 +12412,7 @@ return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW_SCALED); case Intrinsic::aarch64_sve_ld1_gather_uxtw_index: return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED); - case Intrinsic::aarch64_sve_ld1_gather_imm: + case Intrinsic::aarch64_sve_ld1_gather_scalar_offset: return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_IMM); case Intrinsic::aarch64_sve_st1_scatter: return performST1ScatterCombine(N, DAG, AArch64ISD::SST1); diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll @@ -0,0 +1,354 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; LD1B, LD1W, LD1H, LD1D: vector base + immediate offset (index) +; e.g. ld1h { z0.s }, p0/z, [z0.s, #16] +; + +; LD1B +define @gld1b_s_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1b_s_imm_offset: +; CHECK: ld1b { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +define @gld1b_d_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1b_d_imm_offset: +; CHECK: ld1b { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +; LD1H +define @gld1h_s_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1h_s_imm_offset: +; CHECK: ld1h { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +define @gld1h_d_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1h_d_imm_offset: +; CHECK: ld1h { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +; LD1W +define @gld1w_s_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1w_s_imm_offset: +; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32( %pg, + %base, + i64 16) + ret %load +} + +define @gld1w_d_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1w_d_imm_offset: +; CHECK: ld1w { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +define @gld1w_s_imm_offset_float( %pg, %base) { +; CHECK-LABEL: gld1w_s_imm_offset_float: +; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32( %pg, + %base, + i64 16) + ret %load +} + +; LD1D +define @gld1d_d_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1d_d_imm_offset: +; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64( %pg, + %base, + i64 16) + ret %load +} + +define @gld1d_d_imm_offset_double( %pg, %base) { +; CHECK-LABEL: gld1d_d_imm_offset_double: +; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64( %pg, + %base, + i64 16) + ret %load +} + +; +; LD1SB, LD1SW, LD1SH: vector base + immediate offset (index) +; e.g. ld1sh { z0.s }, p0/z, [z0.s, #16] +; + +; LD1SB +define @gld1sb_s_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1sb_s_imm_offset: +; CHECK: ld1sb { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32( %pg, + %base, + i64 16) + %res = sext %load to + ret %res +} + +define @gld1sb_d_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1sb_d_imm_offset: +; CHECK: ld1sb { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64( %pg, + %base, + i64 16) + %res = sext %load to + ret %res +} + +; LD1SH +define @gld1sh_s_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1sh_s_imm_offset: +; CHECK: ld1sh { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32( %pg, + %base, + i64 16) + %res = sext %load to + ret %res +} + +define @gld1sh_d_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1sh_d_imm_offset: +; CHECK: ld1sh { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64( %pg, + %base, + i64 16) + %res = sext %load to + ret %res +} + +; LD1SW +define @gld1sw_d_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1sw_d_imm_offset: +; CHECK: ld1sw { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64( %pg, + %base, + i64 16) + %res = sext %load to + ret %res +} + +; +; LD1B, LD1W, LD1H, LD1D: vector base + out of range immediate offset +; e.g. ld1b { z0.d }, p0/z, [x0, z0.d] +; + +; LD1B +define @gld1b_s_imm_offset_oor( %pg, %base) { +; CHECK-LABEL: gld1b_s_imm_offset_oor: +; CHECK: ld1b { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32( %pg, + %base, + i64 32) + %res = zext %load to + ret %res +} + +define @gld1b_d_imm_offset_oor( %pg, %base) { +; CHECK-LABEL: gld1b_d_imm_offset_oor: +; CHECK: ld1b { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64( %pg, + %base, + i64 32) + %res = zext %load to + ret %res +} + +; LD1H +define @gld1h_s_imm_offset_oor( %pg, %base) { +; CHECK-LABEL: gld1h_s_imm_offset_oor: +; CHECK: ld1h { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32( %pg, + %base, + i64 32) + %res = zext %load to + ret %res +} + +define @gld1h_d_imm_offset_oor( %pg, %base) { +; CHECK-LABEL: gld1h_d_imm_offset_oor: +; CHECK: ld1h { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64( %pg, + %base, + i64 32) + %res = zext %load to + ret %res +} + +; LD1W +define @gld1w_s_imm_offset_oor( %pg, %base) { +; CHECK-LABEL: gld1w_s_imm_offset_oor: +; CHECK: ld1w { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32( %pg, + %base, + i64 32) + ret %load +} + +define @gld1w_d_imm_offset_oor( %pg, %base) { +; CHECK-LABEL: gld1w_d_imm_offset_oor: +; CHECK: ld1w { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64( %pg, + %base, + i64 32) + %res = zext %load to + ret %res +} + +define @gld1w_s_imm_offset_oor_float( %pg, %base) { +; CHECK-LABEL: gld1w_s_imm_offset_oor_float: +; CHECK: ld1w { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32( %pg, + %base, + i64 32) + ret %load +} + +; LD1D +define @gld1d_d_imm_offset_oor( %pg, %base) { +; CHECK-LABEL: gld1d_d_imm_offset_oor: +; CHECK: ld1d { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64( %pg, + %base, + i64 32) + ret %load +} + +define @gld1d_d_imm_offset_oor_double( %pg, %base) { +; CHECK-LABEL: gld1d_d_imm_offset_oor_double: +; CHECK: ld1d { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64( %pg, + %base, + i64 32) + ret %load +} + +; +; LD1SB, LD1SW, LD1SH: vector base + out of range immediate offset +; e.g. ld1sh { z0.s }, p0/z, [z0.s, #16] +; + +; LD1SB +define @gld1sb_s_imm_offset_oor( %pg, %base) { +; CHECK-LABEL: gld1sb_s_imm_offset_oor: +; CHECK: ld1sb { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32( %pg, + %base, + i64 32) + %res = sext %load to + ret %res +} + +define @gld1sb_d_imm_offset_oor( %pg, %base) { +; CHECK-LABEL: gld1sb_d_imm_offset_oor: +; CHECK: ld1sb { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64( %pg, + %base, + i64 32) + %res = sext %load to + ret %res +} + +; LD1SH +define @gld1sh_s_imm_offset_oor( %pg, %base) { +; CHECK-LABEL: gld1sh_s_imm_offset_oor: +; CHECK: ld1sh { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32( %pg, + %base, + i64 32) + %res = sext %load to + ret %res +} + +define @gld1sh_d_imm_offset_oor( %pg, %base) { +; CHECK-LABEL: gld1sh_d_imm_offset_oor: +; CHECK: ld1sh { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64( %pg, + %base, + i64 32) + %res = sext %load to + ret %res +} + +; LD1SW +define @gld1sw_d_imm_offset_oor( %pg, %base) { +; CHECK-LABEL: gld1sw_d_imm_offset_oor: +; CHECK: ld1sw { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64( %pg, + %base, + i64 32) + %res = sext %load to + ret %res +} + +; LD1B/LD1SB +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(, , i64) + +; LD1H/LD1SH +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(, , i64) + +; LD1W/LD1SW +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(, , i64) + +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(, , i64) + +; LD1D +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(, , i64) + +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(, , i64) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll @@ -0,0 +1,186 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; LD1B, LD1W, LD1H, LD1D: vector base + scalar offset (index) +; e.g. ld1b { z0.d }, p0/z, [x0, z0.d] +; + +; LD1B +define @gld1b_s_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1b_s_scalar_offset: +; CHECK: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32( %pg, + %base, + i64 %offset) + %res = zext %load to + ret %res +} + +define @gld1b_d_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1b_d_scalar_offset: +; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64( %pg, + %base, + i64 %offset) + %res = zext %load to + ret %res +} + +; LD1H +define @gld1h_s_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1h_s_scalar_offset: +; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32( %pg, + %base, + i64 %offset) + %res = zext %load to + ret %res +} + +define @gld1h_d_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1h_d_scalar_offset: +; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64( %pg, + %base, + i64 %offset) + %res = zext %load to + ret %res +} + +; LD1W +define @gld1w_s_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1w_s_scalar_offset: +; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32( %pg, + %base, + i64 %offset) + ret %load +} + +define @gld1w_d_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1w_d_scalar_offset: +; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64( %pg, + %base, + i64 %offset) + %res = zext %load to + ret %res +} + +define @gld1w_s_scalar_offset_float( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1w_s_scalar_offset_float: +; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32( %pg, + %base, + i64 %offset) + ret %load +} + +; LD1D +define @gld1d_d_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1d_d_scalar_offset: +; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64( %pg, + %base, + i64 %offset) + ret %load +} + +define @gld1d_d_scalar_offset_double( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1d_d_scalar_offset_double: +; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64( %pg, + %base, + i64 %offset) + ret %load +} + +; LD1SB, LD1SW, LD1SH: vector base + scalar offset (index) +; e.g. ld1b { z0.d }, p0/z, [x0, z0.d] +; + +; LD1SB +define @gld1sb_s_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1sb_s_scalar_offset: +; CHECK: ld1sb { z0.s }, p0/z, [x0, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32( %pg, + %base, + i64 %offset) + %res = sext %load to + ret %res +} + +define @gld1sb_d_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1sb_d_scalar_offset: +; CHECK: ld1sb { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64( %pg, + %base, + i64 %offset) + %res = sext %load to + ret %res +} + +; LD1SH +define @gld1sh_s_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1sh_s_scalar_offset: +; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32( %pg, + %base, + i64 %offset) + %res = sext %load to + ret %res +} + +define @gld1sh_d_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1sh_d_scalar_offset: +; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64( %pg, + %base, + i64 %offset) + %res = sext %load to + ret %res +} + +; LD1SW +define @gld1sw_d_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1sw_d_scalar_offset: +; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64( %pg, + %base, + i64 %offset) + %res = sext %load to + ret %res +} + +; LD1B/LD1SB +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(, , i64) + +; LD1H/LD1SH +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(, , i64) + +; LD1W/LD1SW +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(, , i64) + +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(, , i64) + +; LD1D +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(, , i64) + +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(, , i64) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base.ll +++ /dev/null @@ -1,186 +0,0 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s - -; -; LD1B, LD1W, LD1H, LD1D: vector + immediate (index) -; e.g. ld1h { z0.s }, p0/z, [z0.s, #16] -; - -; LD1B -define @gld1b_s_imm( %pg, %base) { -; CHECK-LABEL: gld1b_s_imm: -; CHECK: ld1b { z0.s }, p0/z, [z0.s, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4i8.nxv4i32( %pg, - %base, - i64 16) - %res = zext %load to - ret %res -} - -define @gld1b_d_imm( %pg, %base) { -; CHECK-LABEL: gld1b_d_imm: -; CHECK: ld1b { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64( %pg, - %base, - i64 16) - %res = zext %load to - ret %res -} - -; LD1H -define @gld1h_s_imm( %pg, %base) { -; CHECK-LABEL: gld1h_s_imm: -; CHECK: ld1h { z0.s }, p0/z, [z0.s, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4i16.nxv4i32( %pg, - %base, - i64 16) - %res = zext %load to - ret %res -} - -define @gld1h_d_imm( %pg, %base) { -; CHECK-LABEL: gld1h_d_imm: -; CHECK: ld1h { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i16.nxv2i64( %pg, - %base, - i64 16) - %res = zext %load to - ret %res -} - -; LD1W -define @gld1w_s_imm( %pg, %base) { -; CHECK-LABEL: gld1w_s_imm: -; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4i32.nxv4i32( %pg, - %base, - i64 16) - ret %load -} - -define @gld1w_d_imm( %pg, %base) { -; CHECK-LABEL: gld1w_d_imm: -; CHECK: ld1w { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i32.nxv2i64( %pg, - %base, - i64 16) - %res = zext %load to - ret %res -} - -define @gld1w_s_imm_float( %pg, %base) { -; CHECK-LABEL: gld1w_s_imm_float: -; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4f32.nxv4i32( %pg, - %base, - i64 16) - ret %load -} - -; LD1D -define @gld1d_d_imm( %pg, %base) { -; CHECK-LABEL: gld1d_d_imm: -; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i64.nxv2i64( %pg, - %base, - i64 16) - ret %load -} - -define @gld1d_d_imm_double( %pg, %base) { -; CHECK-LABEL: gld1d_d_imm_double: -; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2f64.nxv2i64( %pg, - %base, - i64 16) - ret %load -} - -; LD1SB, LD1SW, LD1SH: vector + immediate (index) -; e.g. ld1sh { z0.s }, p0/z, [z0.s, #16] -; - -; LD1SB -define @gld1sb_s_imm( %pg, %base) { -; CHECK-LABEL: gld1sb_s_imm: -; CHECK: ld1sb { z0.s }, p0/z, [z0.s, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4i8.nxv4i32( %pg, - %base, - i64 16) - %res = sext %load to - ret %res -} - -define @gld1sb_d_imm( %pg, %base) { -; CHECK-LABEL: gld1sb_d_imm: -; CHECK: ld1sb { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64( %pg, - %base, - i64 16) - %res = sext %load to - ret %res -} - -; LD1SH -define @gld1sh_s_imm( %pg, %base) { -; CHECK-LABEL: gld1sh_s_imm: -; CHECK: ld1sh { z0.s }, p0/z, [z0.s, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4i16.nxv4i32( %pg, - %base, - i64 16) - %res = sext %load to - ret %res -} - -define @gld1sh_d_imm( %pg, %base) { -; CHECK-LABEL: gld1sh_d_imm: -; CHECK: ld1sh { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i16.nxv2i64( %pg, - %base, - i64 16) - %res = sext %load to - ret %res -} - -; LD1SW -define @gld1sw_d_imm( %pg, %base) { -; CHECK-LABEL: gld1sw_d_imm: -; CHECK: ld1sw { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i32.nxv2i64( %pg, - %base, - i64 16) - %res = sext %load to - ret %res -} - -; LD1B/LD1SB -declare @llvm.aarch64.sve.ld1.gather.imm.nxv4i8.nxv4i32(, , i64) -declare @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(, , i64) - -; LD1H/LD1SH -declare @llvm.aarch64.sve.ld1.gather.imm.nxv4i16.nxv4i32(, , i64) -declare @llvm.aarch64.sve.ld1.gather.imm.nxv2i16.nxv2i64(, , i64) - -; LD1W/LD1SW -declare @llvm.aarch64.sve.ld1.gather.imm.nxv4i32.nxv4i32(, , i64) -declare @llvm.aarch64.sve.ld1.gather.imm.nxv2i32.nxv2i64(, , i64) - -declare @llvm.aarch64.sve.ld1.gather.imm.nxv4f32.nxv4i32(, , i64) - -; LD1D -declare @llvm.aarch64.sve.ld1.gather.imm.nxv2i64.nxv2i64(, , i64) - -declare @llvm.aarch64.sve.ld1.gather.imm.nxv2f64.nxv2i64(, , i64)