diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1125,7 +1125,7 @@ ], [IntrReadMem, IntrArgMemOnly]>; -class AdvSIMD_GatherLoad_VecTorBase_Intrinsic +class AdvSIMD_GatherLoad_VectorBase_Intrinsic : Intrinsic<[llvm_anyvector_ty], [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, @@ -1161,7 +1161,7 @@ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyvector_ty, llvm_i64_ty ], - [IntrWriteMem, IntrArgMemOnly, ImmArg<3>]>; + [IntrWriteMem, IntrArgMemOnly]>; // // Loads @@ -1574,57 +1574,59 @@ def int_aarch64_sve_ptest_last : AdvSIMD_SVE_PTEST_Intrinsic; // -// Gather loads: +// Gather loads: scalar base + vector offsets // -// scalar + vector, 64 bit unscaled offsets +// 64 bit unscaled offsets def int_aarch64_sve_ld1_gather : AdvSIMD_GatherLoad_64bitOffset_Intrinsic; -// scalar + vector, 64 bit scaled offsets +// 64 bit scaled offsets def int_aarch64_sve_ld1_gather_index : AdvSIMD_GatherLoad_64bitOffset_Intrinsic; -// scalar + vector, 32 bit unscaled offsets, sign (sxtw) or zero (zxtw) -// extended to 64 bits +// 32 bit unscaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits def int_aarch64_sve_ld1_gather_sxtw : AdvSIMD_GatherLoad_32bitOffset_Intrinsic; def int_aarch64_sve_ld1_gather_uxtw : AdvSIMD_GatherLoad_32bitOffset_Intrinsic; -// scalar + vector, 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended -// to 64 bits +// 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits def int_aarch64_sve_ld1_gather_sxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic; def int_aarch64_sve_ld1_gather_uxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intrinsic; -// vector base + immediate index -def int_aarch64_sve_ld1_gather_imm : AdvSIMD_GatherLoad_VecTorBase_Intrinsic; +// +// Gather loads: vector base + scalar offset +// + +def int_aarch64_sve_ld1_gather_scalar_offset : AdvSIMD_GatherLoad_VectorBase_Intrinsic; // -// Scatter stores: +// Scatter stores: scalar base + vector offsets // -// scalar + vector, 64 bit unscaled offsets +// 64 bit unscaled offsets def int_aarch64_sve_st1_scatter : AdvSIMD_ScatterStore_64bitOffset_Intrinsic; -// scalar + vector, 64 bit scaled offsets +// 64 bit scaled offsets def int_aarch64_sve_st1_scatter_index : AdvSIMD_ScatterStore_64bitOffset_Intrinsic; -// scalar + vector, 32 bit unscaled offsets, sign (sxtw) or zero (zxtw) -// extended to 64 bits +// 32 bit unscaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits def int_aarch64_sve_st1_scatter_sxtw : AdvSIMD_ScatterStore_32bitOffset_Intrinsic; def int_aarch64_sve_st1_scatter_uxtw : AdvSIMD_ScatterStore_32bitOffset_Intrinsic; -// scalar + vector, 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended -// to 64 bits +// 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits def int_aarch64_sve_st1_scatter_sxtw_index : AdvSIMD_ScatterStore_32bitOffset_Intrinsic; def int_aarch64_sve_st1_scatter_uxtw_index : AdvSIMD_ScatterStore_32bitOffset_Intrinsic; -// vector base + immediate index -def int_aarch64_sve_st1_scatter_imm : AdvSIMD_ScatterStore_VectorBase_Intrinsic; +// +// Scatter stores: vector base + scalar offset +// + +def int_aarch64_sve_st1_scatter_scalar_offset : AdvSIMD_ScatterStore_VectorBase_Intrinsic; // // SVE2 - Non-widening pairwise arithmetic diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12303,11 +12303,34 @@ // Depending on the addressing mode, this is either a pointer or a vector of // pointers (that fits into one register) - const SDValue Base = N->getOperand(4); + SDValue Base = N->getOperand(4); // Depending on the addressing mode, this is either a single offset or a // vector of offsets (that fits into one register) SDValue Offset = N->getOperand(5); + // SST1_IMM requires that the offset is an immediate: + // * multiple of #SizeInBytes + // * in the range [0, 31 x #SizeInBytes] + // where #SizeInBytes is the size in bytes of the stored + // items. For immediates outside that range and non-immediate scalar offsets use + // SST1 or SST1_UXTW instead. + if (Opcode == AArch64ISD::SST1_IMM) { + uint64_t MaxIndex = 31; + uint64_t SrcElSize = SrcElVT.getStoreSize().getKnownMinSize(); + + ConstantSDNode *OffsetConst = dyn_cast(Offset.getNode()); + if (nullptr == OffsetConst || + OffsetConst->getZExtValue() > MaxIndex * SrcElSize || + OffsetConst->getZExtValue() % SrcElSize) { + if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy) + Opcode = AArch64ISD::SST1_UXTW; + else + Opcode = AArch64ISD::SST1; + + std::swap(Base, Offset); + } + } + auto &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isTypeLegal(Base.getValueType())) return SDValue(); @@ -12363,11 +12386,37 @@ // Depending on the addressing mode, this is either a pointer or a vector of // pointers (that fits into one register) - const SDValue Base = N->getOperand(3); + SDValue Base = N->getOperand(3); // Depending on the addressing mode, this is either a single offset or a // vector of offsets (that fits into one register) SDValue Offset = N->getOperand(4); + // GLD1_IMM requires that the offset is an immediate: + // * multiple of #SizeInBytes + // * in the range [0, 31 x #SizeInBytes] + // where #SizeInBytes is the size in bytes of the loaded items. For immediates + // outside that range and non-immediate scalar offsets use GLD1 or GLD1_UXTW + // instead. + if (Opcode == AArch64ISD::GLD1_IMM) { + uint64_t MaxIndex = 31; + uint64_t RetElSize = RetVT.getVectorElementType() + .getSimpleVT() + .getStoreSize() + .getKnownMinSize(); + + ConstantSDNode *OffsetConst = dyn_cast(Offset.getNode()); + if (nullptr == OffsetConst || + OffsetConst->getZExtValue() > MaxIndex * RetElSize || + OffsetConst->getZExtValue() % RetElSize) { + if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy) + Opcode = AArch64ISD::GLD1_UXTW; + else + Opcode = AArch64ISD::GLD1; + + std::swap(Base, Offset); + } + } + auto &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isTypeLegal(Base.getValueType())) return SDValue(); @@ -12573,7 +12622,7 @@ case Intrinsic::aarch64_sve_ld1_gather_uxtw_index: return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED, /*OnlyPackedOffsets=*/false); - case Intrinsic::aarch64_sve_ld1_gather_imm: + case Intrinsic::aarch64_sve_ld1_gather_scalar_offset: return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_IMM); case Intrinsic::aarch64_sve_st1_scatter: return performST1ScatterCombine(N, DAG, AArch64ISD::SST1); @@ -12591,7 +12640,7 @@ case Intrinsic::aarch64_sve_st1_scatter_uxtw_index: return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW_SCALED, /*OnlyPackedOffsets=*/false); - case Intrinsic::aarch64_sve_st1_scatter_imm: + case Intrinsic::aarch64_sve_st1_scatter_scalar_offset: return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_IMM); default: break; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -640,16 +640,16 @@ // Scatters using 32/64-bit pointers with offset, e.g. // st1h z0.s, p0, [z0.s, #16] - defm SST1B_S : sve_mem_32b_sst_vi_ptrs<0b001, "st1b", timm0_31, AArch64st1_scatter_imm, nxv4i8>; - defm SST1H_S : sve_mem_32b_sst_vi_ptrs<0b011, "st1h", tuimm5s2, AArch64st1_scatter_imm, nxv4i16>; - defm SST1W : sve_mem_32b_sst_vi_ptrs<0b101, "st1w", tuimm5s4, AArch64st1_scatter_imm, nxv4i32>; + defm SST1B_S : sve_mem_32b_sst_vi_ptrs<0b001, "st1b", imm0_31, AArch64st1_scatter_imm, nxv4i8>; + defm SST1H_S : sve_mem_32b_sst_vi_ptrs<0b011, "st1h", uimm5s2, AArch64st1_scatter_imm, nxv4i16>; + defm SST1W : sve_mem_32b_sst_vi_ptrs<0b101, "st1w", uimm5s4, AArch64st1_scatter_imm, nxv4i32>; // Scatters using 32/64-bit pointers with offset, e.g. // st1h z0.d, p0, [z0.d, #16] - defm SST1B_D : sve_mem_64b_sst_vi_ptrs<0b000, "st1b", timm0_31, AArch64st1_scatter_imm, nxv2i8>; - defm SST1H_D : sve_mem_64b_sst_vi_ptrs<0b010, "st1h", tuimm5s2, AArch64st1_scatter_imm, nxv2i16>; - defm SST1W_D : sve_mem_64b_sst_vi_ptrs<0b100, "st1w", tuimm5s4, AArch64st1_scatter_imm, nxv2i32>; - defm SST1D : sve_mem_64b_sst_vi_ptrs<0b110, "st1d", tuimm5s8, AArch64st1_scatter_imm, nxv2i64>; + defm SST1B_D : sve_mem_64b_sst_vi_ptrs<0b000, "st1b", imm0_31, AArch64st1_scatter_imm, nxv2i8>; + defm SST1H_D : sve_mem_64b_sst_vi_ptrs<0b010, "st1h", uimm5s2, AArch64st1_scatter_imm, nxv2i16>; + defm SST1W_D : sve_mem_64b_sst_vi_ptrs<0b100, "st1w", uimm5s4, AArch64st1_scatter_imm, nxv2i32>; + defm SST1D : sve_mem_64b_sst_vi_ptrs<0b110, "st1d", uimm5s8, AArch64st1_scatter_imm, nxv2i64>; // Scatters using unscaled 64-bit offsets, e.g. // st1h z0.d, p0, [x0, z0.d] diff --git a/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll b/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll --- a/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll +++ b/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll @@ -12,9 +12,9 @@ ; CHECK-NEXT: st1b { z0.d }, p1, [x0] ; CHECK-NEXT: and z0.d, z0.d, #0xff ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64( %pg, - %base, - i64 16) + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64( %pg, + %base, + i64 16) %res1 = zext %load to %res2 = sext %load to call void @llvm.masked.store.nxv2i8( %load, @@ -35,9 +35,9 @@ ; CHECK-NEXT: sxtb z0.d, p0/m, z1.d ; CHECK-NEXT: st1b { z1.d }, p1, [x0] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64( %pg, - %base, - i64 16) + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64( %pg, + %base, + i64 16) %res = sext %load to call void @llvm.masked.store.nxv2i8( %load, *%res_out, @@ -56,9 +56,9 @@ ; CHECK-NEXT: st1b { z0.d }, p1, [x0] ; CHECK-NEXT: and z0.d, z0.d, #0xff ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64( %pg, - %base, - i64 16) + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64( %pg, + %base, + i64 16) %res = zext %load to call void @llvm.masked.store.nxv2i8( %load, *%res_out, @@ -68,5 +68,5 @@ ret %res } -declare @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(, , i64) declare void @llvm.masked.store.nxv2i8(, *, i32, ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll @@ -0,0 +1,368 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; LD1B, LD1W, LD1H, LD1D: vector base + immediate offset (index) +; e.g. ld1h { z0.s }, p0/z, [z0.s, #16] +; + +; LD1B +define @gld1b_s_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1b_s_imm_offset: +; CHECK: ld1b { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +define @gld1b_d_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1b_d_imm_offset: +; CHECK: ld1b { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +; LD1H +define @gld1h_s_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1h_s_imm_offset: +; CHECK: ld1h { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +define @gld1h_d_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1h_d_imm_offset: +; CHECK: ld1h { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +; LD1W +define @gld1w_s_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1w_s_imm_offset: +; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32( %pg, + %base, + i64 16) + ret %load +} + +define @gld1w_d_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1w_d_imm_offset: +; CHECK: ld1w { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64( %pg, + %base, + i64 16) + %res = zext %load to + ret %res +} + +define @gld1w_s_imm_offset_float( %pg, %base) { +; CHECK-LABEL: gld1w_s_imm_offset_float: +; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32( %pg, + %base, + i64 16) + ret %load +} + +; LD1D +define @gld1d_d_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1d_d_imm_offset: +; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64( %pg, + %base, + i64 16) + ret %load +} + +define @gld1d_d_imm_offset_double( %pg, %base) { +; CHECK-LABEL: gld1d_d_imm_offset_double: +; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64( %pg, + %base, + i64 16) + ret %load +} + +; +; LD1SB, LD1SW, LD1SH: vector base + immediate offset (index) +; e.g. ld1sh { z0.s }, p0/z, [z0.s, #16] +; + +; LD1SB +define @gld1sb_s_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1sb_s_imm_offset: +; CHECK: ld1sb { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32( %pg, + %base, + i64 16) + %res = sext %load to + ret %res +} + +define @gld1sb_d_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1sb_d_imm_offset: +; CHECK: ld1sb { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64( %pg, + %base, + i64 16) + %res = sext %load to + ret %res +} + +; LD1SH +define @gld1sh_s_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1sh_s_imm_offset: +; CHECK: ld1sh { z0.s }, p0/z, [z0.s, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32( %pg, + %base, + i64 16) + %res = sext %load to + ret %res +} + +define @gld1sh_d_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1sh_d_imm_offset: +; CHECK: ld1sh { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64( %pg, + %base, + i64 16) + %res = sext %load to + ret %res +} + +; LD1SW +define @gld1sw_d_imm_offset( %pg, %base) { +; CHECK-LABEL: gld1sw_d_imm_offset: +; CHECK: ld1sw { z0.d }, p0/z, [z0.d, #16] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64( %pg, + %base, + i64 16) + %res = sext %load to + ret %res +} + +; +; LD1B, LD1W, LD1H, LD1D: vector base + out of range immediate offset +; e.g. ld1b { z0.d }, p0/z, [x0, z0.d] +; + +; LD1B +define @gld1b_s_imm_offset_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1b_s_imm_offset_out_of_range: +; CHECK: mov w8, #32 +; CHECK-NEXT: ld1b { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32( %pg, + %base, + i64 32) + %res = zext %load to + ret %res +} + +define @gld1b_d_imm_offset_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1b_d_imm_offset_out_of_range: +; CHECK: mov w8, #32 +; CHECK-NEXT: ld1b { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64( %pg, + %base, + i64 32) + %res = zext %load to + ret %res +} + +; LD1H +define @gld1h_s_imm_offset_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1h_s_imm_offset_out_of_range: +; CHECK: mov w8, #63 +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32( %pg, + %base, + i64 63) + %res = zext %load to + ret %res +} + +define @gld1h_d_imm_offset_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1h_d_imm_offset_out_of_range: +; CHECK: mov w8, #63 +; CHECK-NEXT: ld1h { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64( %pg, + %base, + i64 63) + %res = zext %load to + ret %res +} + +; LD1W +define @gld1w_s_imm_offset_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1w_s_imm_offset_out_of_range: +; CHECK: mov w8, #125 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32( %pg, + %base, + i64 125) + ret %load +} + +define @gld1w_d_imm_offset_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1w_d_imm_offset_out_of_range: +; CHECK: mov w8, #125 +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64( %pg, + %base, + i64 125) + %res = zext %load to + ret %res +} + +define @gld1w_s_imm_offset_out_of_range_float( %pg, %base) { +; CHECK-LABEL: gld1w_s_imm_offset_out_of_range_float: +; CHECK: mov w8, #125 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32( %pg, + %base, + i64 125) + ret %load +} + +; LD1D +define @gld1d_d_imm_offset_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1d_d_imm_offset_out_of_range: +; CHECK: mov w8, #249 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64( %pg, + %base, + i64 249) + ret %load +} + +define @gld1d_d_imm_offset_out_of_range_double( %pg, %base) { +; CHECK-LABEL: gld1d_d_imm_offset_out_of_range_double: +; CHECK: mov w8, #249 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64( %pg, + %base, + i64 249) + ret %load +} + +; +; LD1SB, LD1SW, LD1SH: vector base + out of range immediate offset +; e.g. ld1sb { z0.s }, p0/z, [x8, z0.s, uxtw] +; + +; LD1SB +define @gld1sb_s_imm_offset_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1sb_s_imm_offset_out_of_range: +; CHECK: mov w8, #32 +; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32( %pg, + %base, + i64 32) + %res = sext %load to + ret %res +} + +define @gld1sb_d_imm_offset_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1sb_d_imm_offset_out_of_range: +; CHECK: mov w8, #32 +; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64( %pg, + %base, + i64 32) + %res = sext %load to + ret %res +} + +; LD1SH +define @gld1sh_s_imm_offset_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1sh_s_imm_offset_out_of_range: +; CHECK: mov w8, #63 +; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x8, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32( %pg, + %base, + i64 63) + %res = sext %load to + ret %res +} + +define @gld1sh_d_imm_offset_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1sh_d_imm_offset_out_of_range: +; CHECK: mov w8, #63 +; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64( %pg, + %base, + i64 63) + %res = sext %load to + ret %res +} + +; LD1SW +define @gld1sw_d_imm_offset_out_of_range( %pg, %base) { +; CHECK-LABEL: gld1sw_d_imm_offset_out_of_range: +; CHECK: mov w8, #125 +; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x8, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64( %pg, + %base, + i64 125) + %res = sext %load to + ret %res +} + +; LD1B/LD1SB +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(, , i64) + +; LD1H/LD1SH +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(, , i64) + +; LD1W/LD1SW +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(, , i64) + +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(, , i64) + +; LD1D +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(, , i64) + +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(, , i64) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll @@ -0,0 +1,186 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; LD1B, LD1W, LD1H, LD1D: vector base + scalar offset (index) +; e.g. ld1b { z0.d }, p0/z, [x0, z0.d] +; + +; LD1B +define @gld1b_s_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1b_s_scalar_offset: +; CHECK: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32( %pg, + %base, + i64 %offset) + %res = zext %load to + ret %res +} + +define @gld1b_d_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1b_d_scalar_offset: +; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64( %pg, + %base, + i64 %offset) + %res = zext %load to + ret %res +} + +; LD1H +define @gld1h_s_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1h_s_scalar_offset: +; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32( %pg, + %base, + i64 %offset) + %res = zext %load to + ret %res +} + +define @gld1h_d_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1h_d_scalar_offset: +; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64( %pg, + %base, + i64 %offset) + %res = zext %load to + ret %res +} + +; LD1W +define @gld1w_s_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1w_s_scalar_offset: +; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32( %pg, + %base, + i64 %offset) + ret %load +} + +define @gld1w_d_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1w_d_scalar_offset: +; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64( %pg, + %base, + i64 %offset) + %res = zext %load to + ret %res +} + +define @gld1w_s_scalar_offset_float( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1w_s_scalar_offset_float: +; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32( %pg, + %base, + i64 %offset) + ret %load +} + +; LD1D +define @gld1d_d_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1d_d_scalar_offset: +; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64( %pg, + %base, + i64 %offset) + ret %load +} + +define @gld1d_d_scalar_offset_double( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1d_d_scalar_offset_double: +; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64( %pg, + %base, + i64 %offset) + ret %load +} + +; LD1SB, LD1SW, LD1SH: vector base + scalar offset (index) +; e.g. ld1b { z0.d }, p0/z, [x0, z0.d] +; + +; LD1SB +define @gld1sb_s_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1sb_s_scalar_offset: +; CHECK: ld1sb { z0.s }, p0/z, [x0, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32( %pg, + %base, + i64 %offset) + %res = sext %load to + ret %res +} + +define @gld1sb_d_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1sb_d_scalar_offset: +; CHECK: ld1sb { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64( %pg, + %base, + i64 %offset) + %res = sext %load to + ret %res +} + +; LD1SH +define @gld1sh_s_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1sh_s_scalar_offset: +; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32( %pg, + %base, + i64 %offset) + %res = sext %load to + ret %res +} + +define @gld1sh_d_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1sh_d_scalar_offset: +; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64( %pg, + %base, + i64 %offset) + %res = sext %load to + ret %res +} + +; LD1SW +define @gld1sw_d_scalar_offset( %pg, %base, i64 %offset) { +; CHECK-LABEL: gld1sw_d_scalar_offset: +; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d] +; CHECK-NEXT: ret + %load = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64( %pg, + %base, + i64 %offset) + %res = sext %load to + ret %res +} + +; LD1B/LD1SB +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(, , i64) + +; LD1H/LD1SH +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(, , i64) + +; LD1W/LD1SW +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(, , i64) +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(, , i64) + +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(, , i64) + +; LD1D +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(, , i64) + +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(, , i64) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base.ll +++ /dev/null @@ -1,186 +0,0 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s - -; -; LD1B, LD1W, LD1H, LD1D: vector + immediate (index) -; e.g. ld1h { z0.s }, p0/z, [z0.s, #16] -; - -; LD1B -define @gld1b_s_imm( %pg, %base) { -; CHECK-LABEL: gld1b_s_imm: -; CHECK: ld1b { z0.s }, p0/z, [z0.s, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4i8.nxv4i32( %pg, - %base, - i64 16) - %res = zext %load to - ret %res -} - -define @gld1b_d_imm( %pg, %base) { -; CHECK-LABEL: gld1b_d_imm: -; CHECK: ld1b { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64( %pg, - %base, - i64 16) - %res = zext %load to - ret %res -} - -; LD1H -define @gld1h_s_imm( %pg, %base) { -; CHECK-LABEL: gld1h_s_imm: -; CHECK: ld1h { z0.s }, p0/z, [z0.s, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4i16.nxv4i32( %pg, - %base, - i64 16) - %res = zext %load to - ret %res -} - -define @gld1h_d_imm( %pg, %base) { -; CHECK-LABEL: gld1h_d_imm: -; CHECK: ld1h { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i16.nxv2i64( %pg, - %base, - i64 16) - %res = zext %load to - ret %res -} - -; LD1W -define @gld1w_s_imm( %pg, %base) { -; CHECK-LABEL: gld1w_s_imm: -; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4i32.nxv4i32( %pg, - %base, - i64 16) - ret %load -} - -define @gld1w_d_imm( %pg, %base) { -; CHECK-LABEL: gld1w_d_imm: -; CHECK: ld1w { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i32.nxv2i64( %pg, - %base, - i64 16) - %res = zext %load to - ret %res -} - -define @gld1w_s_imm_float( %pg, %base) { -; CHECK-LABEL: gld1w_s_imm_float: -; CHECK: ld1w { z0.s }, p0/z, [z0.s, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4f32.nxv4i32( %pg, - %base, - i64 16) - ret %load -} - -; LD1D -define @gld1d_d_imm( %pg, %base) { -; CHECK-LABEL: gld1d_d_imm: -; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i64.nxv2i64( %pg, - %base, - i64 16) - ret %load -} - -define @gld1d_d_imm_double( %pg, %base) { -; CHECK-LABEL: gld1d_d_imm_double: -; CHECK: ld1d { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2f64.nxv2i64( %pg, - %base, - i64 16) - ret %load -} - -; LD1SB, LD1SW, LD1SH: vector + immediate (index) -; e.g. ld1sh { z0.s }, p0/z, [z0.s, #16] -; - -; LD1SB -define @gld1sb_s_imm( %pg, %base) { -; CHECK-LABEL: gld1sb_s_imm: -; CHECK: ld1sb { z0.s }, p0/z, [z0.s, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4i8.nxv4i32( %pg, - %base, - i64 16) - %res = sext %load to - ret %res -} - -define @gld1sb_d_imm( %pg, %base) { -; CHECK-LABEL: gld1sb_d_imm: -; CHECK: ld1sb { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64( %pg, - %base, - i64 16) - %res = sext %load to - ret %res -} - -; LD1SH -define @gld1sh_s_imm( %pg, %base) { -; CHECK-LABEL: gld1sh_s_imm: -; CHECK: ld1sh { z0.s }, p0/z, [z0.s, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv4i16.nxv4i32( %pg, - %base, - i64 16) - %res = sext %load to - ret %res -} - -define @gld1sh_d_imm( %pg, %base) { -; CHECK-LABEL: gld1sh_d_imm: -; CHECK: ld1sh { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i16.nxv2i64( %pg, - %base, - i64 16) - %res = sext %load to - ret %res -} - -; LD1SW -define @gld1sw_d_imm( %pg, %base) { -; CHECK-LABEL: gld1sw_d_imm: -; CHECK: ld1sw { z0.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.imm.nxv2i32.nxv2i64( %pg, - %base, - i64 16) - %res = sext %load to - ret %res -} - -; LD1B/LD1SB -declare @llvm.aarch64.sve.ld1.gather.imm.nxv4i8.nxv4i32(, , i64) -declare @llvm.aarch64.sve.ld1.gather.imm.nxv2i8.nxv2i64(, , i64) - -; LD1H/LD1SH -declare @llvm.aarch64.sve.ld1.gather.imm.nxv4i16.nxv4i32(, , i64) -declare @llvm.aarch64.sve.ld1.gather.imm.nxv2i16.nxv2i64(, , i64) - -; LD1W/LD1SW -declare @llvm.aarch64.sve.ld1.gather.imm.nxv4i32.nxv4i32(, , i64) -declare @llvm.aarch64.sve.ld1.gather.imm.nxv2i32.nxv2i64(, , i64) - -declare @llvm.aarch64.sve.ld1.gather.imm.nxv4f32.nxv4i32(, , i64) - -; LD1D -declare @llvm.aarch64.sve.ld1.gather.imm.nxv2i64.nxv2i64(, , i64) - -declare @llvm.aarch64.sve.ld1.gather.imm.nxv2f64.nxv2i64(, , i64) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll @@ -0,0 +1,255 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; ST1B, ST1W, ST1H, ST1D: vector base + immediate offset +; e.g. st1h { z0.s }, p0, [z1.s, #16] +; + +; ST1B +define void @sst1b_s_imm_offset( %data, %pg, %base) { +; CHECK-LABEL: sst1b_s_imm_offset: +; CHECK: st1b { z0.s }, p0, [z1.s, #16] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32( %data_trunc, + %pg, + %base, + i64 16) + ret void +} + +define void @sst1b_d_imm_offset( %data, %pg, %base) { +; CHECK-LABEL: sst1b_d_imm_offset: +; CHECK: st1b { z0.d }, p0, [z1.d, #16] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i8.nxv2i64( %data_trunc, + %pg, + %base, + i64 16) + ret void +} + +; ST1H +define void @sst1h_s_imm_offset( %data, %pg, %base) { +; CHECK-LABEL: sst1h_s_imm_offset: +; CHECK: st1h { z0.s }, p0, [z1.s, #16] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i16.nxv4i32( %data_trunc, + %pg, + %base, + i64 16) + ret void +} + +define void @sst1h_d_imm_offset( %data, %pg, %base) { +; CHECK-LABEL: sst1h_d_imm_offset: +; CHECK: st1h { z0.d }, p0, [z1.d, #16] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i16.nxv2i64( %data_trunc, + %pg, + %base, + i64 16) + ret void +} + +; ST1W +define void @sst1w_s_imm_offset( %data, %pg, %base) { +; CHECK-LABEL: sst1w_s_imm_offset: +; CHECK: st1w { z0.s }, p0, [z1.s, #16] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32( %data, + %pg, + %base, + i64 16) + ret void +} + +define void @sst1w_d_imm_offset( %data, %pg, %base) { +; CHECK-LABEL: sst1w_d_imm_offset: +; CHECK: st1w { z0.d }, p0, [z1.d, #16] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64( %data_trunc, + %pg, + %base, + i64 16) + ret void +} + +define void @sst1w_s_imm_offset_float( %data, %pg, %base) { +; CHECK-LABEL: sst1w_s_imm_offset_float: +; CHECK: st1w { z0.s }, p0, [z1.s, #16] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32( %data, + %pg, + %base, + i64 16) + ret void +} + +; ST1D +define void @sst1d_d_imm_offset( %data, %pg, %base) { +; CHECK-LABEL: sst1d_d_imm_offset: +; CHECK: st1d { z0.d }, p0, [z1.d, #16] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64( %data, + %pg, + %base, + i64 16) + ret void +} + +define void @sst1d_d_imm_offset_double( %data, %pg, %base) { +; CHECK-LABEL: sst1d_d_imm_offset_double: +; CHECK: st1d { z0.d }, p0, [z1.d, #16] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64( %data, + %pg, + %base, + i64 16) + ret void +} + +; +; ST1B, ST1W, ST1H, ST1D: vector base + out of range immediate offset +; e.g. st1h { z0.s }, p0, [z1.s, #16] +; + +; ST1B +define void @sst1b_s_imm_offset_out_of_range( %data, %pg, %base) { +; CHECK-LABEL: sst1b_s_imm_offset_out_of_range: +; CHECK: mov w8, #32 +; CHECK-NEXT: st1b { z0.s }, p0, [x8, z1.s, uxtw] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32( %data_trunc, + %pg, + %base, + i64 32) + ret void +} + +define void @sst1b_d_imm_offset_out_of_range( %data, %pg, %base) { +; CHECK-LABEL: sst1b_d_imm_offset_out_of_range: +; CHECK: mov w8, #32 +; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i8.nxv2i64( %data_trunc, + %pg, + %base, + i64 32) + ret void +} + +; ST1H +define void @sst1h_s_imm_offset_out_of_range( %data, %pg, %base) { +; CHECK-LABEL: sst1h_s_imm_offset_out_of_range: +; CHECK: mov w8, #63 +; CHECK-NEXT: st1h { z0.s }, p0, [x8, z1.s, uxtw] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i16.nxv4i32( %data_trunc, + %pg, + %base, + i64 63) + ret void +} + +define void @sst1h_d_imm_offset_out_of_range( %data, %pg, %base) { +; CHECK-LABEL: sst1h_d_imm_offset_out_of_range: +; CHECK: mov w8, #63 +; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i16.nxv2i64( %data_trunc, + %pg, + %base, + i64 63) + ret void +} + +; ST1W +define void @sst1w_s_imm_offset_out_of_range( %data, %pg, %base) { +; CHECK-LABEL: sst1w_s_imm_offset_out_of_range: +; CHECK: mov w8, #125 +; CHECK-NEXT: st1w { z0.s }, p0, [x8, z1.s, uxtw] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32( %data, + %pg, + %base, + i64 125) + ret void +} + +define void @sst1w_d_imm_offset_out_of_range( %data, %pg, %base) { +; CHECK-LABEL: sst1w_d_imm_offset_out_of_range: +; CHECK: mov w8, #125 +; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64( %data_trunc, + %pg, + %base, + i64 125) + ret void +} + +define void @sst1w_s_imm_offset_float_out_of_range( %data, %pg, %base) { +; CHECK-LABEL: sst1w_s_imm_offset_float_out_of_range: +; CHECK: mov w8, #125 +; CHECK-NEXT: st1w { z0.s }, p0, [x8, z1.s, uxtw] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32( %data, + %pg, + %base, + i64 125) + ret void +} + +; ST1D +define void @sst1d_d_imm_offset_out_of_range( %data, %pg, %base) { +; CHECK-LABEL: sst1d_d_imm_offset_out_of_range: +; CHECK: mov w8, #249 +; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64( %data, + %pg, + %base, + i64 249) + ret void +} + +define void @sst1d_d_imm_offset_double_out_of_range( %data, %pg, %base) { +; CHECK-LABEL: sst1d_d_imm_offset_double_out_of_range: +; CHECK: mov w8, #249 +; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64( %data, + %pg, + %base, + i64 249) + ret void +} + +; ST1B +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32(, , , i64) +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i8.nxv2i64(, , , i64) + +; ST1H +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i16.nxv4i32(, , , i64) +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i16.nxv2i64(, , , i64) + +; ST1W +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(, , , i64) +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64(, , , i64) + +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(, , , i64) + +; ST1D +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(, , , i64) + +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(, , , i64) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll @@ -0,0 +1,133 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; ST1B, ST1W, ST1H, ST1D: vector base + scalar offset +; e.g. st1h { z0.s }, p0, [x0, z1.d] +; + +; ST1B +define void @sst1b_s_scalar_offset( %data, %pg, %base, i64 %offset) { +; CHECK-LABEL: sst1b_s_scalar_offset: +; CHECK: st1b { z0.s }, p0, [x0, z1.s, uxtw] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32( %data_trunc, + %pg, + %base, + i64 %offset) + ret void +} + +define void @sst1b_d_scalar_offset( %data, %pg, %base, i64 %offset) { +; CHECK-LABEL: sst1b_d_scalar_offset: +; CHECK: st1b { z0.d }, p0, [x0, z1.d] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i8.nxv2i64( %data_trunc, + %pg, + %base, + i64 %offset) + ret void +} + +; ST1H +define void @sst1h_s_scalar_offset( %data, %pg, %base, i64 %offset) { +; CHECK-LABEL: sst1h_s_scalar_offset: +; CHECK: st1h { z0.s }, p0, [x0, z1.s, uxtw] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i16.nxv4i32( %data_trunc, + %pg, + %base, + i64 %offset) + ret void +} + +define void @sst1h_d_scalar_offset( %data, %pg, %base, i64 %offset) { +; CHECK-LABEL: sst1h_d_scalar_offset: +; CHECK: st1h { z0.d }, p0, [x0, z1.d] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i16.nxv2i64( %data_trunc, + %pg, + %base, + i64 %offset) + ret void +} + +; ST1W +define void @sst1w_s_scalar_offset( %data, %pg, %base, i64 %offset) { +; CHECK-LABEL: sst1w_s_scalar_offset: +; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32( %data, + %pg, + %base, + i64 %offset) + ret void +} + +define void @sst1w_d_scalar_offset( %data, %pg, %base, i64 %offset) { +; CHECK-LABEL: sst1w_d_scalar_offset: +; CHECK: st1w { z0.d }, p0, [x0, z1.d] +; CHECK-NEXT: ret + %data_trunc = trunc %data to + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64( %data_trunc, + %pg, + %base, + i64 %offset) + ret void +} + +define void @sst1w_s_scalar_offset_float( %data, %pg, %base, i64 %offset) { +; CHECK-LABEL: sst1w_s_scalar_offset_float: +; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32( %data, + %pg, + %base, + i64 %offset) + ret void +} + +; ST1D +define void @sst1d_d_scalar_offset( %data, %pg, %base, i64 %offset) { +; CHECK-LABEL: sst1d_d_scalar_offset: +; CHECK: st1d { z0.d }, p0, [x0, z1.d] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64( %data, + %pg, + %base, + i64 %offset) + ret void +} + +define void @sst1d_d_scalar_offset_double( %data, %pg, %base, i64 %offset) { +; CHECK-LABEL: sst1d_d_scalar_offset_double: +; CHECK: st1d { z0.d }, p0, [x0, z1.d] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64( %data, + %pg, + %base, + i64 %offset) + ret void +} + +; ST1B +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32(, , , i64) +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i8.nxv2i64(, , , i64) + +; ST1H +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i16.nxv4i32(, , , i64) +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i16.nxv2i64(, , , i64) + +; ST1W +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32(, , , i64) +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64(, , , i64) + +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32(, , , i64) + +; ST1D +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64(, , , i64) + +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64(, , , i64) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base.ll +++ /dev/null @@ -1,133 +0,0 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s - -; -; ST1B, ST1W, ST1H, ST1D: vector + immediate (index) -; e.g. st1h { z0.s }, p0, [z1.s, #16] -; - -; ST1B -define void @sst1b_s_imm( %data, %pg, %base) { -; CHECK-LABEL: sst1b_s_imm: -; CHECK: st1b { z0.s }, p0, [z1.s, #16] -; CHECK-NEXT: ret - %data_trunc = trunc %data to - call void @llvm.aarch64.sve.st1.scatter.imm.nxv4i8.nxv4i32( %data_trunc, - %pg, - %base, - i64 16) - ret void -} - -define void @sst1b_d_imm( %data, %pg, %base) { -; CHECK-LABEL: sst1b_d_imm: -; CHECK: st1b { z0.d }, p0, [z1.d, #16] -; CHECK-NEXT: ret - %data_trunc = trunc %data to - call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i8.nxv2i64( %data_trunc, - %pg, - %base, - i64 16) - ret void -} - -; ST1H -define void @sst1h_s_imm( %data, %pg, %base) { -; CHECK-LABEL: sst1h_s_imm: -; CHECK: st1h { z0.s }, p0, [z1.s, #16] -; CHECK-NEXT: ret - %data_trunc = trunc %data to - call void @llvm.aarch64.sve.st1.scatter.imm.nxv4i16.nxv4i32( %data_trunc, - %pg, - %base, - i64 16) - ret void -} - -define void @sst1h_d_imm( %data, %pg, %base) { -; CHECK-LABEL: sst1h_d_imm: -; CHECK: st1h { z0.d }, p0, [z1.d, #16] -; CHECK-NEXT: ret - %data_trunc = trunc %data to - call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i16.nxv2i64( %data_trunc, - %pg, - %base, - i64 16) - ret void -} - -; ST1W -define void @sst1w_s_imm( %data, %pg, %base) { -; CHECK-LABEL: sst1w_s_imm: -; CHECK: st1w { z0.s }, p0, [z1.s, #16] -; CHECK-NEXT: ret - call void @llvm.aarch64.sve.st1.scatter.imm.nxv4i32.nxv4i32( %data, - %pg, - %base, - i64 16) - ret void -} - -define void @sst1w_d_imm( %data, %pg, %base) { -; CHECK-LABEL: sst1w_d_imm: -; CHECK: st1w { z0.d }, p0, [z1.d, #16] -; CHECK-NEXT: ret - %data_trunc = trunc %data to - call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i32.nxv2i64( %data_trunc, - %pg, - %base, - i64 16) - ret void -} - -define void @sst1w_s_imm_float( %data, %pg, %base) { -; CHECK-LABEL: sst1w_s_imm_float: -; CHECK: st1w { z0.s }, p0, [z1.s, #16] -; CHECK-NEXT: ret - call void @llvm.aarch64.sve.st1.scatter.imm.nxv4f32.nxv4i32( %data, - %pg, - %base, - i64 16) - ret void -} - -; ST1D -define void @sst1d_d_imm( %data, %pg, %base) { -; CHECK-LABEL: sst1d_d_imm: -; CHECK: st1d { z0.d }, p0, [z1.d, #16] -; CHECK-NEXT: ret - call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i64.nxv2i64( %data, - %pg, - %base, - i64 16) - ret void -} - -define void @sst1d_d_imm_double( %data, %pg, %base) { -; CHECK-LABEL: sst1d_d_imm_double: -; CHECK: st1d { z0.d }, p0, [z1.d, #16] -; CHECK-NEXT: ret - call void @llvm.aarch64.sve.st1.scatter.imm.nxv2f64.nxv2i64( %data, - %pg, - %base, - i64 16) - ret void -} - -; ST1B -declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4i8.nxv4i32(, , , i64) -declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i8.nxv2i64(, , , i64) - -; ST1H -declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4i16.nxv4i32(, , , i64) -declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i16.nxv2i64(, , , i64) - -; ST1W -declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4i32.nxv4i32(, , , i64) -declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i32.nxv2i64(, , , i64) - -declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4f32.nxv4i32(, , , i64) - -; ST1D -declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i64.nxv2i64(, , , i64) - -declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2f64.nxv2i64(, , , i64)