diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1082,7 +1082,7 @@ ], [IntrReadMem, IntrArgMemOnly]>; -class AdvSIMD_GatherLoad_VecTorBase_Intrinsic +class AdvSIMD_GatherLoad_VectorBase_Intrinsic : Intrinsic<[llvm_anyvector_ty], [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, @@ -1124,7 +1124,7 @@ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyvector_ty, llvm_i64_ty ], - [IntrWriteMem, IntrArgMemOnly, ImmArg<3>]>; + [IntrWriteMem, IntrArgMemOnly]>; class AdvSIMD_1VectorArg_Imm64_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -1464,57 +1464,59 @@ def int_aarch64_sve_punpklo : AdvSIMD_SVE_PUNPKHI_Intrinsic; // -// Gather loads: +// Gather loads: scalar base + vector offsets // -// scalar + vector, 64 bit unscaled offsets +// 64 bit unscaled offsets def int_aarch64_sve_ld1_gather : AdvSIMD_GatherLoad_64bitOffset_Intrinsic; -// scalar + vector, 64 bit scaled offsets +// 64 bit scaled offsets def int_aarch64_sve_ld1_gather_index : AdvSIMD_GatherLoad_64bitOffset_Intrinsic; -// scalar + vector, 32 bit unscaled offsets, sign (sxtw) or zero (zxtw) -// extended to 64 bits +// 32 bit unscaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits def int_aarch64_sve_ld1_gather_sxtw : AdvSIMD_GatherLoad_32bitOffset_Intrinsic; def int_aarch64_sve_ld1_gather_uxtw : AdvSIMD_GatherLoad_32bitOffset_Intrinsic; -// scalar + vector, 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended -// to 64 bits +// 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits def int_aarch64_sve_ld1_gather_sxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic; def int_aarch64_sve_ld1_gather_uxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic; -// vector base + immediate index -def int_aarch64_sve_ld1_gather_imm : AdvSIMD_GatherLoad_VecTorBase_Intrinsic; +// +// Gather loads: vector base + scalar offset +// + +def int_aarch64_sve_ld1_gather_scalar_index : AdvSIMD_GatherLoad_VectorBase_Intrinsic; // -// Scatter stores: +// Scatter stores: scalar base + vector offsets // -// scalar + vector, 64 bit unscaled offsets +// 64 bit unscaled offsets def int_aarch64_sve_st1_scatter : AdvSIMD_ScatterStore_64bitOffset_Intrinsic; -// scalar + vector, 64 bit scaled offsets +// 64 bit scaled offsets def int_aarch64_sve_st1_scatter_index : AdvSIMD_ScatterStore_64bitOffset_Intrinsic; -// scalar + vector, 32 bit unscaled offsets, sign (sxtw) or zero (zxtw) -// extended to 64 bits +// 32 bit unscaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits def int_aarch64_sve_st1_scatter_sxtw : AdvSIMD_ScatterStore_32bitOffset_Intrinsic; def int_aarch64_sve_st1_scatter_uxtw : AdvSIMD_ScatterStore_32bitOffset_Intrinsic; -// scalar + vector, 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended -// to 64 bits +// 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits def int_aarch64_sve_st1_scatter_sxtw_index : AdvSIMD_ScatterStore_32bitOffset_Intrinsic; def int_aarch64_sve_st1_scatter_uxtw_index : AdvSIMD_ScatterStore_32bitOffset_Intrinsic; -// vector base + immediate index -def int_aarch64_sve_st1_scatter_imm : AdvSIMD_ScatterStore_VectorBase_Intrinsic; +// +// Scatter stores: vector base + scalar index +// + +def int_aarch64_sve_st1_scatter_scalar_index : AdvSIMD_ScatterStore_VectorBase_Intrinsic; // // SVE2 - Non-widening pairwise arithmetic diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12133,11 +12133,33 @@ // Depending on the addressing mode, this is either a pointer or a vector of // pointers (that fits into one register) - const SDValue Base = N->getOperand(4); - // Depending on the addressing mode, this is either a single offset or a - // vector of offsets (that fits into one register) + SDValue Base = N->getOperand(4); + // Depending on the addressing mode, this is either a single offset/index or + // a vector of offsets (that fits into one register) SDValue Offset = N->getOperand(5); + // SST1_IMM requires that the offset is an immediate in the range + // [0, 31 x #SizeBytes], where #SizeBytes is the size in bytes of the stored + // items. This translates to the index in the ACLE intrinsic being in the + // range [0, 31]. For immediates outside that range and non-immediate scalar + // offsets use SST1 or SST1_UXTW instead. + if (Opcode == AArch64ISD::SST1_IMM) { + if (!isa(Offset.getNode()) || + cast(Offset.getNode())->getZExtValue() > 31) { + if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy) + Opcode = AArch64ISD::SST1_UXTW; + else + Opcode = AArch64ISD::SST1; + + // Switch from treating the offset as `index` to `offset in bytes` + Offset = + DAG.getNode(ISD::MUL, DL, MVT::i64, Offset, + DAG.getConstant(SrcElVT.getStoreSize(), DL, MVT::i64)); + + std::swap(Base, Offset); + } + } + auto &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isTypeLegal(Base.getValueType())) return SDValue(); @@ -12197,10 +12219,34 @@ // Depending on the addressing mode, this is either a pointer or a vector of // pointers (that fits into one register) - const SDValue Base = N->getOperand(3); - // Depending on the addressing mode, this is either a single offset or a - // vector of offsets (that fits into one register) - const SDValue Offset = N->getOperand(4); + SDValue Base = N->getOperand(3); + // Depending on the addressing mode, this is either a single offset/index or + // a vector of offsets (that fits into one register) + SDValue Offset = N->getOperand(4); + + // GLD1_IMM requires that the offset is an immediate in the range + // [0, 31 x #SizeBytes], where #SizeBytes is the size in bytes of the loaded + // items. This translates to the index in the ACLE intrinsic being in the + // range [0, 31]. For immediates outside that range and non-immediate scalar + // offsets use GLD1 or GLD1_UXTW instead. + if (Opcode == AArch64ISD::GLD1_IMM) { + + if (!isa(Offset.getNode()) || + cast(Offset.getNode())->getZExtValue() > 31) { + + if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy) + Opcode = AArch64ISD::GLD1_UXTW; + else + Opcode = AArch64ISD::GLD1; + + // Switch from treating the offset as `index` to `offset in bytes` + Offset = + DAG.getNode(ISD::MUL, DL, MVT::i64, Offset, + DAG.getConstant(RetElVT.getStoreSize(), DL, MVT::i64)); + + std::swap(Base, Offset); + } + } if (!DAG.getTargetLoweringInfo().isTypeLegal(Base.getValueType()) || !DAG.getTargetLoweringInfo().isTypeLegal(Offset.getValueType())) @@ -12398,7 +12444,7 @@ return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW_SCALED); case Intrinsic::aarch64_sve_ld1_gather_uxtw_index: return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED); - case Intrinsic::aarch64_sve_ld1_gather_imm: + case Intrinsic::aarch64_sve_ld1_gather_scalar_index: return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_IMM); case Intrinsic::aarch64_sve_st1_scatter: return performST1ScatterCombine(N, DAG, AArch64ISD::SST1); @@ -12416,7 +12462,7 @@ case Intrinsic::aarch64_sve_st1_scatter_uxtw_index: return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW_SCALED, /*OnlyPackedOffsets=*/false); - case Intrinsic::aarch64_sve_st1_scatter_imm: + case Intrinsic::aarch64_sve_st1_scatter_scalar_index: return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_IMM); default: break; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -628,16 +628,16 @@ // Scatters using 32/64-bit pointers with offset, e.g. // st1h z0.s, p0, [z0.s, #16] - defm SST1B_S : sve_mem_32b_sst_vi_ptrs<0b001, "st1b", timm0_31, AArch64st1_scatter_imm, nxv4i8>; - defm SST1H_S : sve_mem_32b_sst_vi_ptrs<0b011, "st1h", tuimm5s2, AArch64st1_scatter_imm, nxv4i16>; - defm SST1W : sve_mem_32b_sst_vi_ptrs<0b101, "st1w", tuimm5s4, AArch64st1_scatter_imm, nxv4i32>; + defm SST1B_S : sve_mem_32b_sst_vi_ptrs<0b001, "st1b", imm0_31, AArch64st1_scatter_imm, nxv4i8>; + defm SST1H_S : sve_mem_32b_sst_vi_ptrs<0b011, "st1h", uimm5s2, AArch64st1_scatter_imm, nxv4i16>; + defm SST1W : sve_mem_32b_sst_vi_ptrs<0b101, "st1w", uimm5s4, AArch64st1_scatter_imm, nxv4i32>; // Scatters using 32/64-bit pointers with offset, e.g. // st1h z0.d, p0, [z0.d, #16] - defm SST1B_D : sve_mem_64b_sst_vi_ptrs<0b000, "st1b", timm0_31, AArch64st1_scatter_imm, nxv2i8>; - defm SST1H_D : sve_mem_64b_sst_vi_ptrs<0b010, "st1h", tuimm5s2, AArch64st1_scatter_imm, nxv2i16>; - defm SST1W_D : sve_mem_64b_sst_vi_ptrs<0b100, "st1w", tuimm5s4, AArch64st1_scatter_imm, nxv2i32>; - defm SST1D : sve_mem_64b_sst_vi_ptrs<0b110, "st1d", tuimm5s8, AArch64st1_scatter_imm, nxv2i64>; + defm SST1B_D : sve_mem_64b_sst_vi_ptrs<0b000, "st1b", imm0_31, AArch64st1_scatter_imm, nxv2i8>; + defm SST1H_D : sve_mem_64b_sst_vi_ptrs<0b010, "st1h", uimm5s2, AArch64st1_scatter_imm, nxv2i16>; + defm SST1W_D : sve_mem_64b_sst_vi_ptrs<0b100, "st1w", uimm5s4, AArch64st1_scatter_imm, nxv2i32>; + defm SST1D : sve_mem_64b_sst_vi_ptrs<0b110, "st1d", uimm5s8, AArch64st1_scatter_imm, nxv2i64>; // Scatters using unscaled 64-bit offsets, e.g. // st1h z0.d, p0, [x0, z0.d] diff --git a/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll b/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll --- a/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll +++ b/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll @@ -14,9 +14,9 @@ ; CHECK-NEXT: and z0.d, z1.d, z0.d ; CHECK-NEXT: st1b { z1.d }, p1, [x0] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64( %pg, - %base, - i64 16) + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i8.nxv2i64( %pg, + %base, + i64 16) %res1 = zext %load to %res2 = sext %load to call void @llvm.masked.store.nxv2i8( %load, @@ -37,9 +37,9 @@ ; CHECK-NEXT: sxtb z0.d, p0/m, z1.d ; CHECK-NEXT: st1b { z1.d }, p1, [x0] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64( %pg, - %base, - i64 16) + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i8.nxv2i64( %pg, + %base, + i64 16) %res = sext %load to call void @llvm.masked.store.nxv2i8( %load, *%res_out, @@ -60,9 +60,9 @@ ; CHECK-NEXT: and z0.d, z1.d, z0.d ; CHECK-NEXT: st1b { z1.d }, p1, [x0] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64( %pg, - %base, - i64 16) + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i8.nxv2i64( %pg, + %base, + i64 16) %res = zext %load to call void @llvm.masked.store.nxv2i8( %load, *%res_out, @@ -72,5 +72,5 @@ ret %res } -declare @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i8.nxv2i64(, , i64) declare void @llvm.masked.store.nxv2i8(, *, i32, ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-index.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-index.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-index.ll @@ -0,0 +1,368 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; LD1B, LD1W, LD1H, LD1D: vector base + immediate index +; e.g. ld1h { z0.s }, p0/z, [z0.s, #16] +; + +; LD1B +define @gld1b_s_imm_index( %pg, %base) { +; CHECK-LABEL: gld1b_s_imm_index: +; CHECK: ld1b { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i8.nxv4i32( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +define @gld1b_d_imm_index( %pg, %base) { +; CHECK-LABEL: gld1b_d_imm_index: +; CHECK: ld1b { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i8.nxv2i64( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +; LD1H +define @gld1h_s_imm_index( %pg, %base) { +; CHECK-LABEL: gld1h_s_imm_index: +; CHECK: ld1h { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i16.nxv4i32( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +define @gld1h_d_imm_index( %pg, %base) { +; CHECK-LABEL: gld1h_d_imm_index: +; CHECK: ld1h { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i16.nxv2i64( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +; LD1W +define @gld1w_s_imm_index( %pg, %base) { +; CHECK-LABEL: gld1w_s_imm_index: +; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i32.nxv4i32( %pg, + %base, + i64 16) + ret %load +} + +define @gld1w_d_imm_index( %pg, %base) { +; CHECK-LABEL: gld1w_d_imm_index: +; CHECK: ld1w { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i32.nxv2i64( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +define @gld1w_s_imm_index_float( %pg, %base) { +; CHECK-LABEL: gld1w_s_imm_index_float: +; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4f32.nxv4i32( %pg, + %base, + i64 16) + ret %load +} + +; LD1D +define @gld1d_d_imm_index( %pg, %base) { +; CHECK-LABEL: gld1d_d_imm_index: +; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i64.nxv2i64( %pg, + %base, + i64 16) + ret %load +} + +define @gld1d_d_imm_index_double( %pg, %base) { +; CHECK-LABEL: gld1d_d_imm_index_double: +; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2f64.nxv2i64( %pg, + %base, + i64 16) + ret %load +} + +; +; LD1SB, LD1SW, LD1SH: vector base + immediate index +; e.g. ld1sh { z0.s }, p0/z, [z0.s, #16] +; + +; LD1SB +define @gld1sb_s_imm_index( %pg, %base) { +; CHECK-LABEL: gld1sb_s_imm_index: +; CHECK: ld1sb { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i8.nxv4i32( %pg, + %base, + i64 16) + %res = sext %load to + ret %res +} + +define @gld1sb_d_imm_index( %pg, %base) { +; CHECK-LABEL: gld1sb_d_imm_index: +; CHECK: ld1sb { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i8.nxv2i64( %pg, + %base, + i64 16) + %res = sext %load to + ret %res +} + +; LD1SH +define @gld1sh_s_imm_index( %pg, %base) { +; CHECK-LABEL: gld1sh_s_imm_index: +; CHECK: ld1sh { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i16.nxv4i32( %pg, + %base, + i64 16) + %res = sext %load to + ret %res +} + +define @gld1sh_d_imm_index( %pg, %base) { +; CHECK-LABEL: gld1sh_d_imm_index: +; CHECK: ld1sh { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i16.nxv2i64( %pg, + %base, + i64 16) + %res = sext %load to + ret %res +} + +; LD1SW +define @gld1sw_d_imm_index( %pg, %base) { +; CHECK-LABEL: gld1sw_d_imm_index: +; CHECK: ld1sw { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i32.nxv2i64( %pg, + %base, + i64 16) + %res = sext %load to + ret %res +} + +; +; LD1B, LD1W, LD1H, LD1D: vector base + out of range immediate index +; e.g. ld1b { z0.d }, p0/z, [x0, z0.d] +; + +; LD1B +define @gld1b_s_imm_index_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1b_s_imm_index_out_of_range: +; CHECK: mov w8, #32 +; CHECK-NEXT: ld1b { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i8.nxv4i32( %pg, + %base, + i64 32) + %res = zext %load to + ret %res +} + +define @gld1b_d_imm_index_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1b_d_imm_index_out_of_range: +; CHECK: mov w8, #32 +; CHECK-NEXT: ld1b { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i8.nxv2i64( %pg, + %base, + i64 32) + %res = zext %load to + ret %res +} + +; LD1H +define @gld1h_s_imm_index_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1h_s_imm_index_out_of_range: +; CHECK: mov w8, #64 +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i16.nxv4i32( %pg, + %base, + i64 32) + %res = zext %load to + ret %res +} + +define @gld1h_d_imm_index_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1h_d_imm_index_out_of_range: +; CHECK: mov w8, #64 +; CHECK-NEXT: ld1h { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i16.nxv2i64( %pg, + %base, + i64 32) + %res = zext %load to + ret %res +} + +; LD1W +define @gld1w_s_imm_index_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1w_s_imm_index_out_of_range: +; CHECK: mov w8, #128 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i32.nxv4i32( %pg, + %base, + i64 32) + ret %load +} + +define @gld1w_d_imm_index_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1w_d_imm_index_out_of_range: +; CHECK: mov w8, #128 +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i32.nxv2i64( %pg, + %base, + i64 32) + %res = zext %load to + ret %res +} + +define @gld1w_s_imm_index_out_of_range_float( %pg, %base) { +; CHECK-LABEL: gld1w_s_imm_index_out_of_range_float: +; CHECK: mov w8, #128 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4f32.nxv4i32( %pg, + %base, + i64 32) + ret %load +} + +; LD1D +define @gld1d_d_imm_index_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1d_d_imm_index_out_of_range: +; CHECK: mov w8, #256 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i64.nxv2i64( %pg, + %base, + i64 32) + ret %load +} + +define @gld1d_d_imm_index_out_of_range_double( %pg, %base) { +; CHECK-LABEL: gld1d_d_imm_index_out_of_range_double: +; CHECK: mov w8, #256 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2f64.nxv2i64( %pg, + %base, + i64 32) + ret %load +} + +; +; LD1SB, LD1SW, LD1SH: vector base + out of range immediate index +; e.g. ld1sb { z0.s }, p0/z, [x8, z0.s, uxtw] +; + +; LD1SB +define @gld1sb_s_imm_index_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1sb_s_imm_index_out_of_range: +; CHECK: mov w8, #32 +; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i8.nxv4i32( %pg, + %base, + i64 32) + %res = sext %load to + ret %res +} + +define @gld1sb_d_imm_index_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1sb_d_imm_index_out_of_range: +; CHECK: mov w8, #32 +; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i8.nxv2i64( %pg, + %base, + i64 32) + %res = sext %load to + ret %res +} + +; LD1SH +define @gld1sh_s_imm_index_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1sh_s_imm_index_out_of_range: +; CHECK: mov w8, #64 +; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i16.nxv4i32( %pg, + %base, + i64 32) + %res = sext %load to + ret %res +} + +define @gld1sh_d_imm_index_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1sh_d_imm_index_out_of_range: +; CHECK: mov w8, #64 +; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i16.nxv2i64( %pg, + %base, + i64 32) + %res = sext %load to + ret %res +} + +; LD1SW +define @gld1sw_d_imm_index_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1sw_d_imm_index_out_of_range: +; CHECK: mov w8, #128 +; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i32.nxv2i64( %pg, + %base, + i64 32) + %res = sext %load to + ret %res +} + +; LD1B/LD1SB +declare @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i8.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i8.nxv2i64(, , i64) + +; LD1H/LD1SH +declare @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i16.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i16.nxv2i64(, , i64) + +; LD1W/LD1SW +declare @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i32.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i32.nxv2i64(, , i64) + +declare @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4f32.nxv4i32(, , i64) + +; LD1D +declare @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i64.nxv2i64(, , i64) + +declare @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2f64.nxv2i64(, , i64) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-index.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-index.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-index.ll @@ -0,0 +1,196 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; LD1B, LD1W, LD1H, LD1D: vector base + scalar index +; e.g. ld1b { z0.d }, p0/z, [x0, z0.d] +; + +; LD1B +define @gld1b_s_scalar_index( %pg, %base, i64 %index) { +; CHECK-LABEL: gld1b_s_scalar_index: +; CHECK: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i8.nxv4i32( %pg, + %base, + i64 %index) + %res = zext %load to + ret %res +} + +define @gld1b_d_scalar_index( %pg, %base, i64 %index) { +; CHECK-LABEL: gld1b_d_scalar_index: +; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i8.nxv2i64( %pg, + %base, + i64 %index) + %res = zext %load to + ret %res +} + +; LD1H +define @gld1h_s_scalar_index( %pg, %base, i64 %index) { +; CHECK-LABEL: gld1h_s_scalar_index: +; CHECK: lsl x8, x0, #1 +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i16.nxv4i32( %pg, + %base, + i64 %index) + %res = zext %load to + ret %res +} + +define @gld1h_d_scalar_index( %pg, %base, i64 %index) { +; CHECK-LABEL: gld1h_d_scalar_index: +; CHECK: lsl x8, x0, #1 +; CHECK-NEXT: ld1h { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i16.nxv2i64( %pg, + %base, + i64 %index) + %res = zext %load to + ret %res +} + +; LD1W +define @gld1w_s_scalar_index( %pg, %base, i64 %index) { +; CHECK-LABEL: gld1w_s_scalar_index: +; CHECK: lsl x8, x0, #2 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i32.nxv4i32( %pg, + %base, + i64 %index) + ret %load +} + +define @gld1w_d_scalar_index( %pg, %base, i64 %index) { +; CHECK-LABEL: gld1w_d_scalar_index: +; CHECK: lsl x8, x0, #2 +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i32.nxv2i64( %pg, + %base, + i64 %index) + %res = zext %load to + ret %res +} + +define @gld1w_s_scalar_index_float( %pg, %base, i64 %index) { +; CHECK-LABEL: gld1w_s_scalar_index_float: +; CHECK: lsl x8, x0, #2 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4f32.nxv4i32( %pg, + %base, + i64 %index) + ret %load +} + +; LD1D +define @gld1d_d_scalar_index( %pg, %base, i64 %index) { +; CHECK-LABEL: gld1d_d_scalar_index: +; CHECK: lsl x8, x0, #3 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i64.nxv2i64( %pg, + %base, + i64 %index) + ret %load +} + +define @gld1d_d_scalar_index_double( %pg, %base, i64 %index) { +; CHECK-LABEL: gld1d_d_scalar_index_double: +; CHECK: lsl x8, x0, #3 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2f64.nxv2i64( %pg, + %base, + i64 %index) + ret %load +} + +; LD1SB, LD1SW, LD1SH: vector base + scalar index +; e.g. ld1b { z0.d }, p0/z, [x0, z0.d] +; + +; LD1SB +define @gld1sb_s_scalar_index( %pg, %base, i64 %index) { +; CHECK-LABEL: gld1sb_s_scalar_index: +; CHECK: ld1sb { z0.s }, p0/z, [x0, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i8.nxv4i32( %pg, + %base, + i64 %index) + %res = sext %load to + ret %res +} + +define @gld1sb_d_scalar_index( %pg, %base, i64 %index) { +; CHECK-LABEL: gld1sb_d_scalar_index: +; CHECK: ld1sb { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i8.nxv2i64( %pg, + %base, + i64 %index) + %res = sext %load to + ret %res +} + +; LD1SH +define @gld1sh_s_scalar_index( %pg, %base, i64 %index) { +; CHECK-LABEL: gld1sh_s_scalar_index: +; CHECK: lsl x8, x0, #1 +; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i16.nxv4i32( %pg, + %base, + i64 %index) + %res = sext %load to + ret %res +} + +define @gld1sh_d_scalar_index( %pg, %base, i64 %index) { +; CHECK-LABEL: gld1sh_d_scalar_index: +; CHECK: lsl x8, x0, #1 +; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i16.nxv2i64( %pg, + %base, + i64 %index) + %res = sext %load to + ret %res +} + +; LD1SW +define @gld1sw_d_scalar_index( %pg, %base, i64 %index) { +; CHECK-LABEL: gld1sw_d_scalar_index: +; CHECK: lsl x8, x0, #2 +; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i32.nxv2i64( %pg, + %base, + i64 %index) + %res = sext %load to + ret %res +} + +; LD1B/LD1SB +declare @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i8.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i8.nxv2i64(, , i64) + +; LD1H/LD1SH +declare @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i16.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i16.nxv2i64(, , i64) + +; LD1W/LD1SW +declare @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4i32.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i32.nxv2i64(, , i64) + +declare @llvm.aarch64.sve.ld1.gather.scalar.index.nxv4f32.nxv4i32(, , i64) + +; LD1D +declare @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2i64.nxv2i64(, , i64) + +declare @llvm.aarch64.sve.ld1.gather.scalar.index.nxv2f64.nxv2i64(, , i64) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base.ll +++ /dev/null @@ -1,186 +0,0 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s - -; -; LD1B, LD1W, LD1H, LD1D: vector + immediate (index) -; e.g. ld1h { z0.s }, p0/z, [z0.s, #16] -; - -; LD1B -define @gld1b_s_imm( %pg, %base) { -; CHECK-LABEL: gld1b_s_imm: -; CHECK: ld1b { z0.s }, p0/z, [z0.s, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4i8.nxv4i32( %pg, - %base, - i64 16) - %res = zext %load to - ret %res -} - -define @gld1b_d_imm( %pg, %base) { -; CHECK-LABEL: gld1b_d_imm: -; CHECK: ld1b { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64( %pg, - %base, - i64 16) - %res = zext %load to - ret %res -} - -; LD1H -define @gld1h_s_imm( %pg, %base) { -; CHECK-LABEL: gld1h_s_imm: -; CHECK: ld1h { z0.s }, p0/z, [z0.s, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4i16.nxv4i32( %pg, - %base, - i64 16) - %res = zext %load to - ret %res -} - -define @gld1h_d_imm( %pg, %base) { -; CHECK-LABEL: gld1h_d_imm: -; CHECK: ld1h { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i16.nxv2i64( %pg, - %base, - i64 16) - %res = zext %load to - ret %res -} - -; LD1W -define @gld1w_s_imm( %pg, %base) { -; CHECK-LABEL: gld1w_s_imm: -; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4i32.nxv4i32( %pg, - %base, - i64 16) - ret %load -} - -define @gld1w_d_imm( %pg, %base) { -; CHECK-LABEL: gld1w_d_imm: -; CHECK: ld1w { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i32.nxv2i64( %pg, - %base, - i64 16) - %res = zext %load to - ret %res -} - -define @gld1w_s_imm_float( %pg, %base) { -; CHECK-LABEL: gld1w_s_imm_float: -; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4f32.nxv4i32( %pg, - %base, - i64 16) - ret %load -} - -; LD1D -define @gld1d_d_imm( %pg, %base) { -; CHECK-LABEL: gld1d_d_imm: -; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i64.nxv2i64( %pg, - %base, - i64 16) - ret %load -} - -define @gld1d_d_imm_double( %pg, %base) { -; CHECK-LABEL: gld1d_d_imm_double: -; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2f64.nxv2i64( %pg, - %base, - i64 16) - ret %load -} - -; LD1SB, LD1SW, LD1SH: vector + immediate (index) -; e.g. ld1sh { z0.s }, p0/z, [z0.s, #16] -; - -; LD1SB -define @gld1sb_s_imm( %pg, %base) { -; CHECK-LABEL: gld1sb_s_imm: -; CHECK: ld1sb { z0.s }, p0/z, [z0.s, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4i8.nxv4i32( %pg, - %base, - i64 16) - %res = sext %load to - ret %res -} - -define @gld1sb_d_imm( %pg, %base) { -; CHECK-LABEL: gld1sb_d_imm: -; CHECK: ld1sb { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64( %pg, - %base, - i64 16) - %res = sext %load to - ret %res -} - -; LD1SH -define @gld1sh_s_imm( %pg, %base) { -; CHECK-LABEL: gld1sh_s_imm: -; CHECK: ld1sh { z0.s }, p0/z, [z0.s, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4i16.nxv4i32( %pg, - %base, - i64 16) - %res = sext %load to - ret %res -} - -define @gld1sh_d_imm( %pg, %base) { -; CHECK-LABEL: gld1sh_d_imm: -; CHECK: ld1sh { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i16.nxv2i64( %pg, - %base, - i64 16) - %res = sext %load to - ret %res -} - -; LD1SW -define @gld1sw_d_imm( %pg, %base) { -; CHECK-LABEL: gld1sw_d_imm: -; CHECK: ld1sw { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i32.nxv2i64( %pg, - %base, - i64 16) - %res = sext %load to - ret %res -} - -; LD1B/LD1SB -declare @llvm.aarch64.sve.ld1.gather.imm.nxv4i8.nxv4i32(, , i64) -declare @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(, , i64) - -; LD1H/LD1SH -declare @llvm.aarch64.sve.ld1.gather.imm.nxv4i16.nxv4i32(, , i64) -declare @llvm.aarch64.sve.ld1.gather.imm.nxv2i16.nxv2i64(, , i64) - -; LD1W/LD1SW -declare @llvm.aarch64.sve.ld1.gather.imm.nxv4i32.nxv4i32(, , i64) -declare @llvm.aarch64.sve.ld1.gather.imm.nxv2i32.nxv2i64(, , i64) - -declare @llvm.aarch64.sve.ld1.gather.imm.nxv4f32.nxv4i32(, , i64) - -; LD1D -declare @llvm.aarch64.sve.ld1.gather.imm.nxv2i64.nxv2i64(, , i64) - -declare @llvm.aarch64.sve.ld1.gather.imm.nxv2f64.nxv2i64(, , i64) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-index.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-index.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-index.ll @@ -0,0 +1,255 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; ST1B, ST1W, ST1H, ST1D: vector base + immediate index +; e.g. st1h { z0.s }, p0, [z1.s, #16] +; + +; ST1B +define void @sst1b_s_imm_index( %data, %pg, %base) { +; CHECK-LABEL: sst1b_s_imm_index: +; CHECK: st1b { z0.s }, p0, [z1.s, #16] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4i8.nxv4i32( %data_trunc, + %pg, + %base, + i64 16) + ret void +} + +define void @sst1b_d_imm_index( %data, %pg, %base) { +; CHECK-LABEL: sst1b_d_imm_index: +; CHECK: st1b { z0.d }, p0, [z1.d, #16] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i8.nxv2i64( %data_trunc, + %pg, + %base, + i64 16) + ret void +} + +; ST1H +define void @sst1h_s_imm_index( %data, %pg, %base) { +; CHECK-LABEL: sst1h_s_imm_index: +; CHECK: st1h { z0.s }, p0, [z1.s, #16] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4i16.nxv4i32( %data_trunc, + %pg, + %base, + i64 16) + ret void +} + +define void @sst1h_d_imm_index( %data, %pg, %base) { +; CHECK-LABEL: sst1h_d_imm_index: +; CHECK: st1h { z0.d }, p0, [z1.d, #16] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i16.nxv2i64( %data_trunc, + %pg, + %base, + i64 16) + ret void +} + +; ST1W +define void @sst1w_s_imm_index( %data, %pg, %base) { +; CHECK-LABEL: sst1w_s_imm_index: +; CHECK: st1w { z0.s }, p0, [z1.s, #16] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4i32.nxv4i32( %data, + %pg, + %base, + i64 16) + ret void +} + +define void @sst1w_d_imm_index( %data, %pg, %base) { +; CHECK-LABEL: sst1w_d_imm_index: +; CHECK: st1w { z0.d }, p0, [z1.d, #16] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i32.nxv2i64( %data_trunc, + %pg, + %base, + i64 16) + ret void +} + +define void @sst1w_s_imm_index_float( %data, %pg, %base) { +; CHECK-LABEL: sst1w_s_imm_index_float: +; CHECK: st1w { z0.s }, p0, [z1.s, #16] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4f32.nxv4i32( %data, + %pg, + %base, + i64 16) + ret void +} + +; ST1D +define void @sst1d_d_imm_index( %data, %pg, %base) { +; CHECK-LABEL: sst1d_d_imm_index: +; CHECK: st1d { z0.d }, p0, [z1.d, #16] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i64.nxv2i64( %data, + %pg, + %base, + i64 16) + ret void +} + +define void @sst1d_d_imm_index_double( %data, %pg, %base) { +; CHECK-LABEL: sst1d_d_imm_index_double: +; CHECK: st1d { z0.d }, p0, [z1.d, #16] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2f64.nxv2i64( %data, + %pg, + %base, + i64 16) + ret void +} + +; +; ST1B, ST1W, ST1H, ST1D: vector base + out of range immediate index +; e.g. st1h { z0.s }, p0, [z1.s, #16] +; + +; ST1B +define void @sst1b_s_imm_index_out_of_range( %data, %pg, %base) { +; CHECK-LABEL: sst1b_s_imm_index_out_of_range: +; CHECK: mov w8, #32 +; CHECK-NEXT: st1b { z0.s }, p0, [x8, z1.s, uxtw] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4i8.nxv4i32( %data_trunc, + %pg, + %base, + i64 32) + ret void +} + +define void @sst1b_d_imm_index_out_of_range( %data, %pg, %base) { +; CHECK-LABEL: sst1b_d_imm_index_out_of_range: +; CHECK: mov w8, #32 +; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i8.nxv2i64( %data_trunc, + %pg, + %base, + i64 32) + ret void +} + +; ST1H +define void @sst1h_s_imm_index_out_of_range( %data, %pg, %base) { +; CHECK-LABEL: sst1h_s_imm_index_out_of_range: +; CHECK: mov w8, #64 +; CHECK-NEXT: st1h { z0.s }, p0, [x8, z1.s, uxtw] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4i16.nxv4i32( %data_trunc, + %pg, + %base, + i64 32) + ret void +} + +define void @sst1h_d_imm_index_out_of_range( %data, %pg, %base) { +; CHECK-LABEL: sst1h_d_imm_index_out_of_range: +; CHECK: mov w8, #64 +; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i16.nxv2i64( %data_trunc, + %pg, + %base, + i64 32) + ret void +} + +; ST1W +define void @sst1w_s_imm_index_out_of_range( %data, %pg, %base) { +; CHECK-LABEL: sst1w_s_imm_index_out_of_range: +; CHECK: mov w8, #128 +; CHECK-NEXT: st1w { z0.s }, p0, [x8, z1.s, uxtw] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4i32.nxv4i32( %data, + %pg, + %base, + i64 32) + ret void +} + +define void @sst1w_d_imm_index_out_of_range( %data, %pg, %base) { +; CHECK-LABEL: sst1w_d_imm_index_out_of_range: +; CHECK: mov w8, #128 +; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i32.nxv2i64( %data_trunc, + %pg, + %base, + i64 32) + ret void +} + +define void @sst1w_s_imm_index_float_out_of_range( %data, %pg, %base) { +; CHECK-LABEL: sst1w_s_imm_index_float_out_of_range: +; CHECK: mov w8, #128 +; CHECK-NEXT: st1w { z0.s }, p0, [x8, z1.s, uxtw] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4f32.nxv4i32( %data, + %pg, + %base, + i64 32) + ret void +} + +; ST1D +define void @sst1d_d_imm_index_out_of_range( %data, %pg, %base) { +; CHECK-LABEL: sst1d_d_imm_index_out_of_range: +; CHECK: mov w8, #256 +; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i64.nxv2i64( %data, + %pg, + %base, + i64 32) + ret void +} + +define void @sst1d_d_imm_index_double_out_of_range( %data, %pg, %base) { +; CHECK-LABEL: sst1d_d_imm_index_double_out_of_range: +; CHECK: mov w8, #256 +; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2f64.nxv2i64( %data, + %pg, + %base, + i64 32) + ret void +} + +; ST1B +declare void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4i8.nxv4i32(, , , i64) +declare void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i8.nxv2i64(, , , i64) + +; ST1H +declare void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4i16.nxv4i32(, , , i64) +declare void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i16.nxv2i64(, , , i64) + +; ST1W +declare void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4i32.nxv4i32(, , , i64) +declare void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i32.nxv2i64(, , , i64) + +declare void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4f32.nxv4i32(, , , i64) + +; ST1D +declare void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i64.nxv2i64(, , , i64) + +declare void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2f64.nxv2i64(, , , i64) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-index.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-index.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-index.ll @@ -0,0 +1,140 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; ST1B, ST1W, ST1H, ST1D: vector base + scalar index +; e.g. st1h { z0.s }, p0, [x0, z1.d] +; + +; ST1B +define void @sst1b_s_scalar_index( %data, %pg, %base, i64 %index) { +; CHECK-LABEL: sst1b_s_scalar_index: +; CHECK: st1b { z0.s }, p0, [x0, z1.s, uxtw] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4i8.nxv4i32( %data_trunc, + %pg, + %base, + i64 %index) + ret void +} + +define void @sst1b_d_scalar_index( %data, %pg, %base, i64 %index) { +; CHECK-LABEL: sst1b_d_scalar_index: +; CHECK: st1b { z0.d }, p0, [x0, z1.d] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i8.nxv2i64( %data_trunc, + %pg, + %base, + i64 %index) + ret void +} + +; ST1H +define void @sst1h_s_scalar_index( %data, %pg, %base, i64 %index) { +; CHECK-LABEL: sst1h_s_scalar_index: +; CHECK: lsl x8, x0, #1 +; CHECK: st1h { z0.s }, p0, [x8, z1.s, uxtw] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4i16.nxv4i32( %data_trunc, + %pg, + %base, + i64 %index) + ret void +} + +define void @sst1h_d_scalar_index( %data, %pg, %base, i64 %index) { +; CHECK-LABEL: sst1h_d_scalar_index: +; CHECK: lsl x8, x0, #1 +; CHECK: st1h { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i16.nxv2i64( %data_trunc, + %pg, + %base, + i64 %index) + ret void +} + +; ST1W +define void @sst1w_s_scalar_index( %data, %pg, %base, i64 %index) { +; CHECK-LABEL: sst1w_s_scalar_index: +; CHECK: lsl x8, x0, #2 +; CHECK: st1w { z0.s }, p0, [x8, z1.s, uxtw] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4i32.nxv4i32( %data, + %pg, + %base, + i64 %index) + ret void +} + +define void @sst1w_d_scalar_index( %data, %pg, %base, i64 %index) { +; CHECK-LABEL: sst1w_d_scalar_index: +; CHECK: lsl x8, x0, #2 +; CHECK: st1w { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i32.nxv2i64( %data_trunc, + %pg, + %base, + i64 %index) + ret void +} + +define void @sst1w_s_scalar_index_float( %data, %pg, %base, i64 %index) { +; CHECK-LABEL: sst1w_s_scalar_index_float: +; CHECK: lsl x8, x0, #2 +; CHECK: st1w { z0.s }, p0, [x8, z1.s, uxtw] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4f32.nxv4i32( %data, + %pg, + %base, + i64 %index) + ret void +} + +; ST1D +define void @sst1d_d_scalar_index( %data, %pg, %base, i64 %index) { +; CHECK-LABEL: sst1d_d_scalar_index: +; CHECK: lsl x8, x0, #3 +; CHECK: st1d { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i64.nxv2i64( %data, + %pg, + %base, + i64 %index) + ret void +} + +define void @sst1d_d_scalar_index_double( %data, %pg, %base, i64 %index) { +; CHECK-LABEL: sst1d_d_scalar_index_double: +; CHECK: lsl x8, x0, #3 +; CHECK: st1d { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2f64.nxv2i64( %data, + %pg, + %base, + i64 %index) + ret void +} + +; ST1B +declare void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4i8.nxv4i32(, , , i64) +declare void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i8.nxv2i64(, , , i64) + +; ST1H +declare void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4i16.nxv4i32(, , , i64) +declare void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i16.nxv2i64(, , , i64) + +; ST1W +declare void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4i32.nxv4i32(, , , i64) +declare void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i32.nxv2i64(, , , i64) + +declare void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv4f32.nxv4i32(, , , i64) + +; ST1D +declare void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2i64.nxv2i64(, , , i64) + +declare void @llvm.aarch64.sve.st1.scatter.scalar.index.nxv2f64.nxv2i64(, , , i64) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base.ll +++ /dev/null @@ -1,133 +0,0 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s - -; -; ST1B, ST1W, ST1H, ST1D: vector + immediate (index) -; e.g. st1h { z0.s }, p0, [z1.s, #16] -; - -; ST1B -define void @sst1b_s_imm( %data, %pg, %base) { -; CHECK-LABEL: sst1b_s_imm: -; CHECK: st1b { z0.s }, p0, [z1.s, #16] -; CHECK-NEXT: ret - %data_trunc = trunc %data to - call void @llvm.aarch64.sve.st1.scatter.imm.nxv4i8.nxv4i32( %data_trunc, - %pg, - %base, - i64 16) - ret void -} - -define void @sst1b_d_imm( %data, %pg, %base) { -; CHECK-LABEL: sst1b_d_imm: -; CHECK: st1b { z0.d }, p0, [z1.d, #16] -; CHECK-NEXT: ret - %data_trunc = trunc %data to - call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i8.nxv2i64( %data_trunc, - %pg, - %base, - i64 16) - ret void -} - -; ST1H -define void @sst1h_s_imm( %data, %pg, %base) { -; CHECK-LABEL: sst1h_s_imm: -; CHECK: st1h { z0.s }, p0, [z1.s, #16] -; CHECK-NEXT: ret - %data_trunc = trunc %data to - call void @llvm.aarch64.sve.st1.scatter.imm.nxv4i16.nxv4i32( %data_trunc, - %pg, - %base, - i64 16) - ret void -} - -define void @sst1h_d_imm( %data, %pg, %base) { -; CHECK-LABEL: sst1h_d_imm: -; CHECK: st1h { z0.d }, p0, [z1.d, #16] -; CHECK-NEXT: ret - %data_trunc = trunc %data to - call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i16.nxv2i64( %data_trunc, - %pg, - %base, - i64 16) - ret void -} - -; ST1W -define void @sst1w_s_imm( %data, %pg, %base) { -; CHECK-LABEL: sst1w_s_imm: -; CHECK: st1w { z0.s }, p0, [z1.s, #16] -; CHECK-NEXT: ret - call void @llvm.aarch64.sve.st1.scatter.imm.nxv4i32.nxv4i32( %data, - %pg, - %base, - i64 16) - ret void -} - -define void @sst1w_d_imm( %data, %pg, %base) { -; CHECK-LABEL: sst1w_d_imm: -; CHECK: st1w { z0.d }, p0, [z1.d, #16] -; CHECK-NEXT: ret - %data_trunc = trunc %data to - call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i32.nxv2i64( %data_trunc, - %pg, - %base, - i64 16) - ret void -} - -define void @sst1w_s_imm_float( %data, %pg, %base) { -; CHECK-LABEL: sst1w_s_imm_float: -; CHECK: st1w { z0.s }, p0, [z1.s, #16] -; CHECK-NEXT: ret - call void @llvm.aarch64.sve.st1.scatter.imm.nxv4f32.nxv4i32( %data, - %pg, - %base, - i64 16) - ret void -} - -; ST1D -define void @sst1d_d_imm( %data, %pg, %base) { -; CHECK-LABEL: sst1d_d_imm: -; CHECK: st1d { z0.d }, p0, [z1.d, #16] -; CHECK-NEXT: ret - call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i64.nxv2i64( %data, - %pg, - %base, - i64 16) - ret void -} - -define void @sst1d_d_imm_double( %data, %pg, %base) { -; CHECK-LABEL: sst1d_d_imm_double: -; CHECK: st1d { z0.d }, p0, [z1.d, #16] -; CHECK-NEXT: ret - call void @llvm.aarch64.sve.st1.scatter.imm.nxv2f64.nxv2i64( %data, - %pg, - %base, - i64 16) - ret void -} - -; ST1B -declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4i8.nxv4i32(, , , i64) -declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i8.nxv2i64(, , , i64) - -; ST1H -declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4i16.nxv4i32(, , , i64) -declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i16.nxv2i64(, , , i64) - -; ST1W -declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4i32.nxv4i32(, , , i64) -declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i32.nxv2i64(, , , i64) - -declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4f32.nxv4i32(, , , i64) - -; ST1D -declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i64.nxv2i64(, , , i64) - -declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2f64.nxv2i64(, , , i64)