diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1114,7 +1114,8 @@ : Intrinsic<[llvm_anyvector_ty], [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMPointerToElt<0>, llvm_anyvector_ty + LLVMPointerToElt<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i32_ty> ], [IntrReadMem, IntrArgMemOnly]>; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12231,18 +12231,14 @@ } static SDValue performLD1GatherCombine(SDNode *N, SelectionDAG &DAG, - unsigned Opcode) { + unsigned Opcode, + bool OnlyPackedOffsets = true) { EVT RetVT = N->getValueType(0); assert(RetVT.isScalableVector() && "Gather loads are only possible for SVE vectors"); - SDLoc DL(N); - MVT RetElVT = RetVT.getVectorElementType().getSimpleVT(); - unsigned NumElements = AArch64::SVEBitsPerBlock / RetElVT.getSizeInBits(); - EVT MaxVT = llvm::MVT::getScalableVectorVT(RetElVT, NumElements); - if (RetVT.getSizeInBits().getKnownMinSize() > - MaxVT.getSizeInBits().getKnownMinSize()) + if (RetVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock) return SDValue(); // Depending on the addressing mode, this is either a pointer or a vector of @@ -12250,12 +12246,19 @@ const SDValue Base = N->getOperand(3); // Depending on the addressing mode, this is either a single offset or a // vector of offsets (that fits into one register) - const SDValue Offset = N->getOperand(4); + SDValue Offset = N->getOperand(4); - if (!DAG.getTargetLoweringInfo().isTypeLegal(Base.getValueType()) || - !DAG.getTargetLoweringInfo().isTypeLegal(Offset.getValueType())) + auto &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(Base.getValueType())) return SDValue(); + // Some gather load variants allow unpacked offsets, but only as nxv2i32 + // vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to + // nxv2i64. Legalize accordingly. + if (!OnlyPackedOffsets && + Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32) + Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0); + // Return value type that is representable in hardware EVT HwRetVt = getSVEContainerType(RetVT); @@ -12439,13 +12442,17 @@ case Intrinsic::aarch64_sve_ld1_gather_index: return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SCALED); case Intrinsic::aarch64_sve_ld1_gather_sxtw: - return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW); + return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW, + /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_ld1_gather_uxtw: - return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW); + return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW, + /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_ld1_gather_sxtw_index: - return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW_SCALED); + return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW_SCALED, + /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_ld1_gather_uxtw_index: - return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED); + return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED, + /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_ld1_gather_imm: return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_IMM); case Intrinsic::aarch64_sve_st1_scatter: diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll @@ -11,9 +11,9 @@ ; CHECK-LABEL: gld1h_s_uxtw_index: ; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw #1] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16.nxv4i32( %pg, - i16* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16( %pg, + i16* %base, + %b) %res = zext %load to ret %res } @@ -22,31 +22,31 @@ ; CHECK-LABEL: gld1h_s_sxtw_index: ; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw #1] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16.nxv4i32( %pg, - i16* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16( %pg, + i16* %base, + %b) %res = zext %load to ret %res } -define @gld1h_d_uxtw_index( %pg, i16* %base, %b) { +define @gld1h_d_uxtw_index( %pg, i16* %base, %b) { ; CHECK-LABEL: gld1h_d_uxtw_index: ; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16.nxv2i64( %pg, - i16* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16( %pg, + i16* %base, + %b) %res = zext %load to ret %res } -define @gld1h_d_sxtw_index( %pg, i16* %base, %b) { +define @gld1h_d_sxtw_index( %pg, i16* %base, %b) { ; CHECK-LABEL: gld1h_d_sxtw_index: ; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16.nxv2i64( %pg, - i16* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16( %pg, + i16* %base, + %b) %res = zext %load to ret %res } @@ -56,9 +56,9 @@ ; CHECK-LABEL: gld1w_s_uxtw_index: ; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32.nxv4i32( %pg, - i32* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32( %pg, + i32* %base, + %b) ret %load } @@ -66,30 +66,30 @@ ; CHECK-LABEL: gld1w_s_sxtw_index: ; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32.nxv4i32( %pg, - i32* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32( %pg, + i32* %base, + %b) ret %load } -define @gld1w_d_uxtw_index( %pg, i32* %base, %b) { +define @gld1w_d_uxtw_index( %pg, i32* %base, %b) { ; CHECK-LABEL: gld1w_d_uxtw_index: ; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32.nxv2i64( %pg, - i32* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32( %pg, + i32* %base, + %b) %res = zext %load to ret %res } -define @gld1w_d_sxtw_index( %pg, i32* %base, %b) { +define @gld1w_d_sxtw_index( %pg, i32* %base, %b) { ; CHECK-LABEL: gld1w_d_sxtw_index: ; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32.nxv2i64( %pg, - i32* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32( %pg, + i32* %base, + %b) %res = zext %load to ret %res } @@ -98,9 +98,9 @@ ; CHECK-LABEL: gld1w_s_uxtw_index_float: ; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32.nxv4i32( %pg, - float* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32( %pg, + float* %base, + %b) ret %load } @@ -108,50 +108,50 @@ ; CHECK-LABEL: gld1w_s_sxtw_index_float: ; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32.nxv4i32( %pg, - float* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32( %pg, + float* %base, + %b) ret %load } ; LD1D -define @gld1d_s_uxtw_index( %pg, i64* %base, %b) { +define @gld1d_s_uxtw_index( %pg, i64* %base, %b) { ; CHECK-LABEL: gld1d_s_uxtw_index: ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i64.nxv2i64( %pg, - i64* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i64( %pg, + i64* %base, + %b) ret %load } -define @gld1d_sxtw_index( %pg, i64* %base, %b) { +define @gld1d_sxtw_index( %pg, i64* %base, %b) { ; CHECK-LABEL: gld1d_sxtw_index: ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i64.nxv2i64( %pg, - i64* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i64( %pg, + i64* %base, + %b) ret %load } -define @gld1d_uxtw_index_double( %pg, double* %base, %b) { +define @gld1d_uxtw_index_double( %pg, double* %base, %b) { ; CHECK-LABEL: gld1d_uxtw_index_double: ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2f64.nxv2i64( %pg, - double* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2f64( %pg, + double* %base, + %b) ret %load } -define @gld1d_sxtw_index_double( %pg, double* %base, %b) { +define @gld1d_sxtw_index_double( %pg, double* %base, %b) { ; CHECK-LABEL: gld1d_sxtw_index_double: ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2f64.nxv2i64( %pg, - double* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2f64( %pg, + double* %base, + %b) ret %load } @@ -166,9 +166,9 @@ ; CHECK-LABEL: gld1sh_s_uxtw_index: ; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw #1] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16.nxv4i32( %pg, - i16* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16( %pg, + i16* %base, + %b) %res = sext %load to ret %res } @@ -177,79 +177,79 @@ ; CHECK-LABEL: gld1sh_s_sxtw_index: ; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw #1] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16.nxv4i32( %pg, - i16* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16( %pg, + i16* %base, + %b) %res = sext %load to ret %res } -define @gld1sh_d_uxtw_index( %pg, i16* %base, %b) { +define @gld1sh_d_uxtw_index( %pg, i16* %base, %b) { ; CHECK-LABEL: gld1sh_d_uxtw_index: ; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16.nxv2i64( %pg, - i16* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16( %pg, + i16* %base, + %b) %res = sext %load to ret %res } -define @gld1sh_d_sxtw_index( %pg, i16* %base, %b) { +define @gld1sh_d_sxtw_index( %pg, i16* %base, %b) { ; CHECK-LABEL: gld1sh_d_sxtw_index: ; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16.nxv2i64( %pg, - i16* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16( %pg, + i16* %base, + %b) %res = sext %load to ret %res } ; LD1SW -define @gld1sw_d_uxtw_index( %pg, i32* %base, %b) { +define @gld1sw_d_uxtw_index( %pg, i32* %base, %b) { ; CHECK-LABEL: gld1sw_d_uxtw_index: ; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32.nxv2i64( %pg, - i32* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32( %pg, + i32* %base, + %b) %res = sext %load to ret %res } -define @gld1sw_d_sxtw_index( %pg, i32* %base, %b) { +define @gld1sw_d_sxtw_index( %pg, i32* %base, %b) { ; CHECK-LABEL: gld1sw_d_sxtw_index: ; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32.nxv2i64( %pg, - i32* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32( %pg, + i32* %base, + %b) %res = sext %load to ret %res } ; LD1H/LD1SH -declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16.nxv4i32(, i16*, ) -declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16.nxv4i32(, i16*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(, i16*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(, i16*, ) -declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16.nxv2i64(, i16*, ) -declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16.nxv2i64(, i16*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16(, i16*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16(, i16*, ) ; LD1W/LD1SW -declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32.nxv4i32(, i32*, ) -declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32.nxv4i32(, i32*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(, i32*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(, i32*, ) -declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32.nxv2i64(, i32*, ) -declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32.nxv2i64(, i32*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32(, i32*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32(, i32*, ) -declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32.nxv4i32(, float*, ) -declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32.nxv4i32(, float*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32(, float*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32(, float*, ) ; LD1D -declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i64.nxv2i64(, i64*, ) -declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i64.nxv2i64(, i64*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i64(, i64*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i64(, i64*, ) -declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2f64.nxv2i64(, double*, ) -declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2f64.nxv2i64(, double*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2f64(, double*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2f64(, double*, ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll @@ -11,9 +11,9 @@ ; CHECK-LABEL: gld1b_s_uxtw: ; CHECK: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8.nxv4i32( %pg, - i8* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8( %pg, + i8* %base, + %b) %res = zext %load to ret %res } @@ -22,31 +22,31 @@ ; CHECK-LABEL: gld1b_s_sxtw: ; CHECK: ld1b { z0.s }, p0/z, [x0, z0.s, sxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8.nxv4i32( %pg, - i8* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8( %pg, + i8* %base, + %b) %res = zext %load to ret %res } -define @gld1b_d_uxtw( %pg, i8* %base, %b) { +define @gld1b_d_uxtw( %pg, i8* %base, %b) { ; CHECK-LABEL: gld1b_d_uxtw: ; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i8.nxv2i64( %pg, - i8* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i8( %pg, + i8* %base, + %b) %res = zext %load to ret %res } -define @gld1b_d_sxtw( %pg, i8* %base, %b) { +define @gld1b_d_sxtw( %pg, i8* %base, %b) { ; CHECK-LABEL: gld1b_d_sxtw: ; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d, sxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i8.nxv2i64( %pg, - i8* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i8( %pg, + i8* %base, + %b) %res = zext %load to ret %res } @@ -56,9 +56,9 @@ ; CHECK-LABEL: gld1h_s_uxtw: ; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16.nxv4i32( %pg, - i16* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16( %pg, + i16* %base, + %b) %res = zext %load to ret %res } @@ -67,31 +67,31 @@ ; CHECK-LABEL: gld1h_s_sxtw: ; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16.nxv4i32( %pg, - i16* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16( %pg, + i16* %base, + %b) %res = zext %load to ret %res } -define @gld1h_d_uxtw( %pg, i16* %base, %b) { +define @gld1h_d_uxtw( %pg, i16* %base, %b) { ; CHECK-LABEL: gld1h_d_uxtw: ; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i16.nxv2i64( %pg, - i16* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i16( %pg, + i16* %base, + %b) %res = zext %load to ret %res } -define @gld1h_d_sxtw( %pg, i16* %base, %b) { +define @gld1h_d_sxtw( %pg, i16* %base, %b) { ; CHECK-LABEL: gld1h_d_sxtw: ; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i16.nxv2i64( %pg, - i16* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i16( %pg, + i16* %base, + %b) %res = zext %load to ret %res } @@ -101,9 +101,9 @@ ; CHECK-LABEL: gld1w_s_uxtw: ; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32.nxv4i32( %pg, - i32* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32( %pg, + i32* %base, + %b) ret %load } @@ -111,30 +111,30 @@ ; CHECK-LABEL: gld1w_s_sxtw: ; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32.nxv4i32( %pg, - i32* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32( %pg, + i32* %base, + %b) ret %load } -define @gld1w_d_uxtw( %pg, i32* %base, %b) { +define @gld1w_d_uxtw( %pg, i32* %base, %b) { ; CHECK-LABEL: gld1w_d_uxtw: ; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i32.nxv2i64( %pg, - i32* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i32( %pg, + i32* %base, + %b) %res = zext %load to ret %res } -define @gld1w_d_sxtw( %pg, i32* %base, %b) { +define @gld1w_d_sxtw( %pg, i32* %base, %b) { ; CHECK-LABEL: gld1w_d_sxtw: ; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i32.nxv2i64( %pg, - i32* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i32( %pg, + i32* %base, + %b) %res = zext %load to ret %res } @@ -143,9 +143,9 @@ ; CHECK-LABEL: gld1w_s_uxtw_float: ; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv4f32.nxv4i32( %pg, - float* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv4f32( %pg, + float* %base, + %b) ret %load } @@ -153,50 +153,50 @@ ; CHECK-LABEL: gld1w_s_sxtw_float: ; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv4f32.nxv4i32( %pg, - float* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv4f32( %pg, + float* %base, + %b) ret %load } ; LD1D -define @gld1d_d_uxtw( %pg, i64* %base, %b) { +define @gld1d_d_uxtw( %pg, i64* %base, %b) { ; CHECK-LABEL: gld1d_d_uxtw: ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i64.nxv2i64( %pg, - i64* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i64( %pg, + i64* %base, + %b) ret %load } -define @gld1d_d_sxtw( %pg, i64* %base, %b) { +define @gld1d_d_sxtw( %pg, i64* %base, %b) { ; CHECK-LABEL: gld1d_d_sxtw: ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i64.nxv2i64( %pg, - i64* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i64( %pg, + i64* %base, + %b) ret %load } -define @gld1d_d_uxtw_double( %pg, double* %base, %b) { +define @gld1d_d_uxtw_double( %pg, double* %base, %b) { ; CHECK-LABEL: gld1d_d_uxtw_double: ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2f64.nxv2i64( %pg, - double* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2f64( %pg, + double* %base, + %b) ret %load } -define @gld1d_d_sxtw_double( %pg, double* %base, %b) { +define @gld1d_d_sxtw_double( %pg, double* %base, %b) { ; CHECK-LABEL: gld1d_d_sxtw_double: ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2f64.nxv2i64( %pg, - double* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2f64( %pg, + double* %base, + %b) ret %load } @@ -211,9 +211,9 @@ ; CHECK-LABEL: gld1sb_s_uxtw: ; CHECK: ld1sb { z0.s }, p0/z, [x0, z0.s, uxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8.nxv4i32( %pg, - i8* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8( %pg, + i8* %base, + %b) %res = sext %load to ret %res } @@ -222,31 +222,31 @@ ; CHECK-LABEL: gld1sb_s_sxtw: ; CHECK: ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8.nxv4i32( %pg, - i8* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8( %pg, + i8* %base, + %b) %res = sext %load to ret %res } -define @gld1sb_d_uxtw( %pg, i8* %base, %b) { +define @gld1sb_d_uxtw( %pg, i8* %base, %b) { ; CHECK-LABEL: gld1sb_d_uxtw: ; CHECK: ld1sb { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i8.nxv2i64( %pg, - i8* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i8( %pg, + i8* %base, + %b) %res = sext %load to ret %res } -define @gld1sb_d_sxtw( %pg, i8* %base, %b) { +define @gld1sb_d_sxtw( %pg, i8* %base, %b) { ; CHECK-LABEL: gld1sb_d_sxtw: ; CHECK: ld1sb { z0.d }, p0/z, [x0, z0.d, sxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i8.nxv2i64( %pg, - i8* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i8( %pg, + i8* %base, + %b) %res = sext %load to ret %res } @@ -256,9 +256,9 @@ ; CHECK-LABEL: gld1sh_s_uxtw: ; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16.nxv4i32( %pg, - i16* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16( %pg, + i16* %base, + %b) %res = sext %load to ret %res } @@ -267,82 +267,82 @@ ; CHECK-LABEL: gld1sh_s_sxtw: ; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16.nxv4i32( %pg, - i16* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16( %pg, + i16* %base, + %b) %res = sext %load to ret %res } -define @gld1sh_d_uxtw( %pg, i16* %base, %b) { +define @gld1sh_d_uxtw( %pg, i16* %base, %b) { ; CHECK-LABEL: gld1sh_d_uxtw: ; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i16.nxv2i64( %pg, - i16* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i16( %pg, + i16* %base, + %b) %res = sext %load to ret %res } -define @gld1sh_d_sxtw( %pg, i16* %base, %b) { +define @gld1sh_d_sxtw( %pg, i16* %base, %b) { ; CHECK-LABEL: gld1sh_d_sxtw: ; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i16.nxv2i64( %pg, - i16* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i16( %pg, + i16* %base, + %b) %res = sext %load to ret %res } ; LD1SW -define @gld1sw_d_uxtw( %pg, i32* %base, %b) { +define @gld1sw_d_uxtw( %pg, i32* %base, %b) { ; CHECK-LABEL: gld1sw_d_uxtw: ; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i32.nxv2i64( %pg, - i32* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i32( %pg, + i32* %base, + %b) %res = sext %load to ret %res } -define @gld1sw_d_sxtw( %pg, i32* %base, %b) { +define @gld1sw_d_sxtw( %pg, i32* %base, %b) { ; CHECK-LABEL: gld1sw_d_sxtw: ; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw] ; CHECK-NEXT: ret - %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i32.nxv2i64( %pg, - i32* %base, - %b) + %load = call @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i32( %pg, + i32* %base, + %b) %res = sext %load to ret %res } ; LD1B/LD1SB -declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8.nxv4i32(, i8*, ) -declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i8.nxv2i64(, i8*, ) -declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8.nxv4i32(, i8*, ) -declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i8.nxv2i64(, i8*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(, i8*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i8(, i8*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(, i8*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i8(, i8*, ) ; LD1H/LD1SH -declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16.nxv4i32(, i16*, ) -declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i16.nxv2i64(, i16*, ) -declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16.nxv4i32(, i16*, ) -declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i16.nxv2i64(, i16*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(, i16*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i16(, i16*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(, i16*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i16(, i16*, ) ; LD1W/LD1SW -declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32.nxv4i32(, i32*, ) -declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i32.nxv2i64(, i32*, ) -declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32.nxv4i32(, i32*, ) -declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i32.nxv2i64(, i32*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32(, i32*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i32(, i32*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32(, i32*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i32(, i32*, ) -declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv4f32.nxv4i32(, float*, ) -declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv4f32.nxv4i32(, float*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv4f32(, float*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv4f32(, float*, ) ; LD1D -declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i64.nxv2i64(, i64*, ) -declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i64.nxv2i64(, i64*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i64(, i64*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i64(, i64*, ) -declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv2f64.nxv2i64(, double*, ) -declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv2f64.nxv2i64(, double*, ) +declare @llvm.aarch64.sve.ld1.gather.sxtw.nxv2f64(, double*, ) +declare @llvm.aarch64.sve.ld1.gather.uxtw.nxv2f64(, double*, )