diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -796,6 +796,12 @@ LLVMPointerTo<0>], [IntrArgMemOnly, NoCapture<2>]>; + class AdvSIMD_SVE_Index_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMVectorElementType<0>, + LLVMVectorElementType<0>], + [IntrNoMem]>; + class AdvSIMD_Merged1VectorArg_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, @@ -1237,6 +1243,8 @@ def int_aarch64_sve_dup : AdvSIMD_SVE_DUP_Intrinsic; +def int_aarch64_sve_index : AdvSIMD_SVE_Index_Intrinsic; + // // Integer arithmetic // diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -216,6 +216,7 @@ PTRUE, DUP_PRED, + INDEX_VECTOR, LDNF1, LDNF1S, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1426,6 +1426,7 @@ case AArch64ISD::STP: return "AArch64ISD::STP"; case AArch64ISD::STNP: return "AArch64ISD::STNP"; case AArch64ISD::DUP_PRED: return "AArch64ISD::DUP_PRED"; + case AArch64ISD::INDEX_VECTOR: return "AArch64ISD::INDEX_VECTOR"; } return nullptr; } @@ -10918,6 +10919,21 @@ return SDValue(); } +static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + SDValue Op1 = N->getOperand(1); + SDValue Op2 = N->getOperand(2); + EVT ScalarTy = Op1.getValueType(); + + if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16)) { + Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); + Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); + } + + return DAG.getNode(AArch64ISD::INDEX_VECTOR, DL, N->getValueType(0), + Op1, Op2); +} + static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG) { SDLoc dl(N); SDValue Scalar = N->getOperand(3); @@ -11118,6 +11134,8 @@ return LowerSVEIntReduction(N, AArch64ISD::EORV_PRED, DAG); case Intrinsic::aarch64_sve_andv: return LowerSVEIntReduction(N, AArch64ISD::ANDV_PRED, DAG); + case Intrinsic::aarch64_sve_index: + return LowerSVEIntrinsicIndex(N, DAG); case Intrinsic::aarch64_sve_dup: return LowerSVEIntrinsicDUP(N, DAG); case Intrinsic::aarch64_sve_ext: diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -330,6 +330,18 @@ let DecoderMethod = "DecodeSImm<5>"; } +def simm5_8b : Operand, ImmLeaf= -16 && (int8_t)Imm < 16; }]> { + let ParserMatchClass = SImm5Operand; + let DecoderMethod = "DecodeSImm<5>"; + let PrintMethod = "printSImm<8>"; +} + +def simm5_16b : Operand, ImmLeaf= -16 && (int16_t)Imm < 16; }]> { + let ParserMatchClass = SImm5Operand; + let DecoderMethod = "DecodeSImm<5>"; + let PrintMethod = "printSImm<16>"; +} + // simm7sN predicate - True if the immediate is a multiple of N in the range // [-64 * N, 63 * N]. diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -99,6 +99,9 @@ def SDT_AArch64DUP_PRED : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisVec<2>, SDTCVecEltisVT<2,i1>]>; def AArch64dup_pred : SDNode<"AArch64ISD::DUP_PRED", SDT_AArch64DUP_PRED>; +def SDT_IndexVector : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<2>]>; +def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>; + let Predicates = [HasSVE] in { defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>; @@ -961,10 +964,10 @@ defm INCP_ZP : sve_int_count_v<0b10000, "incp">; defm DECP_ZP : sve_int_count_v<0b10100, "decp">; - defm INDEX_RR : sve_int_index_rr<"index">; - defm INDEX_IR : sve_int_index_ir<"index">; - defm INDEX_RI : sve_int_index_ri<"index">; - defm INDEX_II : sve_int_index_ii<"index">; + defm INDEX_RR : sve_int_index_rr<"index", index_vector>; + defm INDEX_IR : sve_int_index_ir<"index", index_vector>; + defm INDEX_RI : sve_int_index_ri<"index", index_vector>; + defm INDEX_II : sve_int_index_ii<"index", index_vector>; // Unpredicated shifts defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr">; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h @@ -56,6 +56,9 @@ raw_ostream &O); void printImmHex(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + template + void printSImm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); template void printImmSVE(T Value, raw_ostream &O); void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm, raw_ostream &O); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -900,6 +900,19 @@ O << format("#%#llx", Op.getImm()); } +template +void AArch64InstPrinter::printSImm(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Size == 8) + O << "#" << formatImm((signed char)Op.getImm()); + else if (Size == 16) + O << "#" << formatImm((signed short)Op.getImm()); + else + O << "#" << formatImm(Op.getImm()); +} + void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -4386,11 +4386,20 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_ii { - def _B : sve_int_index_ii<0b00, asm, ZPR8, simm5_32b>; - def _H : sve_int_index_ii<0b01, asm, ZPR16, simm5_32b>; +multiclass sve_int_index_ii { + def _B : sve_int_index_ii<0b00, asm, ZPR8, simm5_8b>; + def _H : sve_int_index_ii<0b01, asm, ZPR16, simm5_16b>; def _S : sve_int_index_ii<0b10, asm, ZPR32, simm5_32b>; def _D : sve_int_index_ii<0b11, asm, ZPR64, simm5_64b>; + + def : Pat<(nxv16i8 (op simm5_8b:$imm5, simm5_8b:$imm5b)), + (!cast(NAME # "_B") simm5_8b:$imm5, simm5_8b:$imm5b)>; + def : Pat<(nxv8i16 (op simm5_16b:$imm5, simm5_16b:$imm5b)), + (!cast(NAME # "_H") simm5_16b:$imm5, simm5_16b:$imm5b)>; + def : Pat<(nxv4i32 (op simm5_32b:$imm5, simm5_32b:$imm5b)), + (!cast(NAME # "_S") simm5_32b:$imm5, simm5_32b:$imm5b)>; + def : Pat<(nxv2i64 (op simm5_64b:$imm5, simm5_64b:$imm5b)), + (!cast(NAME # "_D") simm5_64b:$imm5, simm5_64b:$imm5b)>; } class sve_int_index_ir sz8_64, string asm, ZPRRegOp zprty, @@ -4410,11 +4419,20 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_ir { - def _B : sve_int_index_ir<0b00, asm, ZPR8, GPR32, simm5_32b>; - def _H : sve_int_index_ir<0b01, asm, ZPR16, GPR32, simm5_32b>; +multiclass sve_int_index_ir { + def _B : sve_int_index_ir<0b00, asm, ZPR8, GPR32, simm5_8b>; + def _H : sve_int_index_ir<0b01, asm, ZPR16, GPR32, simm5_16b>; def _S : sve_int_index_ir<0b10, asm, ZPR32, GPR32, simm5_32b>; def _D : sve_int_index_ir<0b11, asm, ZPR64, GPR64, simm5_64b>; + + def : Pat<(nxv16i8 (op simm5_8b:$imm5, GPR32:$Rm)), + (!cast(NAME # "_B") simm5_8b:$imm5, GPR32:$Rm)>; + def : Pat<(nxv8i16 (op simm5_16b:$imm5, GPR32:$Rm)), + (!cast(NAME # "_H") simm5_16b:$imm5, GPR32:$Rm)>; + def : Pat<(nxv4i32 (op simm5_32b:$imm5, GPR32:$Rm)), + (!cast(NAME # "_S") simm5_32b:$imm5, GPR32:$Rm)>; + def : Pat<(nxv2i64 (op simm5_64b:$imm5, GPR64:$Rm)), + (!cast(NAME # "_D") simm5_64b:$imm5, GPR64:$Rm)>; } class sve_int_index_ri sz8_64, string asm, ZPRRegOp zprty, @@ -4434,11 +4452,20 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_ri { - def _B : sve_int_index_ri<0b00, asm, ZPR8, GPR32, simm5_32b>; - def _H : sve_int_index_ri<0b01, asm, ZPR16, GPR32, simm5_32b>; +multiclass sve_int_index_ri { + def _B : sve_int_index_ri<0b00, asm, ZPR8, GPR32, simm5_8b>; + def _H : sve_int_index_ri<0b01, asm, ZPR16, GPR32, simm5_16b>; def _S : sve_int_index_ri<0b10, asm, ZPR32, GPR32, simm5_32b>; def _D : sve_int_index_ri<0b11, asm, ZPR64, GPR64, simm5_64b>; + + def : Pat<(nxv16i8 (op GPR32:$Rm, simm5_8b:$imm5)), + (!cast(NAME # "_B") GPR32:$Rm, simm5_8b:$imm5)>; + def : Pat<(nxv8i16 (op GPR32:$Rm, simm5_16b:$imm5)), + (!cast(NAME # "_H") GPR32:$Rm, simm5_16b:$imm5)>; + def : Pat<(nxv4i32 (op GPR32:$Rm, simm5_32b:$imm5)), + (!cast(NAME # "_S") GPR32:$Rm, simm5_32b:$imm5)>; + def : Pat<(nxv2i64 (op GPR64:$Rm, simm5_64b:$imm5)), + (!cast(NAME # "_D") GPR64:$Rm, simm5_64b:$imm5)>; } class sve_int_index_rr sz8_64, string asm, ZPRRegOp zprty, @@ -4458,11 +4485,16 @@ let Inst{4-0} = Zd; } -multiclass sve_int_index_rr { +multiclass sve_int_index_rr { def _B : sve_int_index_rr<0b00, asm, ZPR8, GPR32>; def _H : sve_int_index_rr<0b01, asm, ZPR16, GPR32>; def _S : sve_int_index_rr<0b10, asm, ZPR32, GPR32>; def _D : sve_int_index_rr<0b11, asm, ZPR64, GPR64>; + + def : SVE_2_Op_Pat(NAME # _B)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } // //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll @@ -0,0 +1,178 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; INDEX (IMMEDIATES) +; + +define @index_ii_i8() { +; CHECK-LABEL: index_ii_i8: +; CHECK: index z0.b, #-16, #15 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.index.nxv16i8(i8 -16, i8 15) + ret %out +} + +define @index_ii_i16() { +; CHECK-LABEL: index_ii_i16: +; CHECK: index z0.h, #15, #-16 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.index.nxv8i16(i16 15, i16 -16) + ret %out +} + +define @index_ii_i32() { +; CHECK-LABEL: index_ii_i32: +; CHECK: index z0.s, #-16, #15 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.index.nxv4i32(i32 -16, i32 15) + ret %out +} + +define @index_ii_i64() { +; CHECK-LABEL: index_ii_i64: +; CHECK: index z0.d, #15, #-16 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.index.nxv2i64(i64 15, i64 -16) + ret %out +} + +define @index_ii_range() { +; CHECK-LABEL: index_ii_range: +; CHECK: mov w8, #16 +; CHECK-NEXT: mov x9, #-17 +; CHECK-NEXT: index z0.d, x9, x8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.index.nxv2i64(i64 -17, i64 16) + ret %out +} + +; +; INDEX (IMMEDIATE, SCALAR) +; + +define @index_ir_i8(i8 %a) { +; CHECK-LABEL: index_ir_i8: +; CHECK: index z0.b, #15, w0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.index.nxv16i8(i8 15, i8 %a) + ret %out +} + +define @index_ir_i16(i16 %a) { +; CHECK-LABEL: index_ir_i16: +; CHECK: index z0.h, #-16, w0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.index.nxv8i16(i16 -16, i16 %a) + ret %out +} + +define @index_ir_i32(i32 %a) { +; CHECK-LABEL: index_ir_i32: +; CHECK: index z0.s, #15, w0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.index.nxv4i32(i32 15, i32 %a) + ret %out +} + +define @index_ir_i64(i64 %a) { +; CHECK-LABEL: index_ir_i64: +; CHECK: index z0.d, #-16, x0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.index.nxv2i64(i64 -16, i64 %a) + ret %out +} + +define @index_ir_range(i32 %a) { +; CHECK-LABEL: index_ir_range: +; CHECK: mov w8, #-17 +; CHECK: index z0.s, w8, w0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.index.nxv4i32(i32 -17, i32 %a) + ret %out +} + +; +; INDEX (SCALAR, IMMEDIATE) +; + +define @index_ri_i8(i8 %a) { +; CHECK-LABEL: index_ri_i8: +; CHECK: index z0.b, w0, #-16 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.index.nxv16i8(i8 %a, i8 -16) + ret %out +} + +define @index_ri_i16(i16 %a) { +; CHECK-LABEL: index_ri_i16: +; CHECK: index z0.h, w0, #15 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.index.nxv8i16(i16 %a, i16 15) + ret %out +} + +define @index_ri_i32(i32 %a) { +; CHECK-LABEL: index_ri_i32: +; CHECK: index z0.s, w0, #-16 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.index.nxv4i32(i32 %a, i32 -16) + ret %out +} + +define @index_ri_i64(i64 %a) { +; CHECK-LABEL: index_ri_i64: +; CHECK: index z0.d, x0, #15 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.index.nxv2i64(i64 %a, i64 15) + ret %out +} + +define @index_ri_range(i16 %a) { +; CHECK-LABEL: index_ri_range: +; CHECK: mov w8, #16 +; CHECK: index z0.h, w0, w8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.index.nxv8i16(i16 %a, i16 16) + ret %out +} + +; +; INDEX (SCALARS) +; + +define @index_rr_i8(i8 %a, i8 %b) { +; CHECK-LABEL: index_rr_i8: +; CHECK: index z0.b, w0, w1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.index.nxv16i8(i8 %a, i8 %b) + ret %out +} + +define @index_rr_i16(i16 %a, i16 %b) { +; CHECK-LABEL: index_rr_i16: +; CHECK: index z0.h, w0, w1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.index.nxv8i16(i16 %a, i16 %b) + ret %out +} + +define @index_rr_i32(i32 %a, i32 %b) { +; CHECK-LABEL: index_rr_i32: +; CHECK: index z0.s, w0, w1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.index.nxv4i32(i32 %a, i32 %b) + ret %out +} + +define @index_rr_i64(i64 %a, i64 %b) { +; CHECK-LABEL: index_rr_i64: +; CHECK: index z0.d, x0, x1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.index.nxv2i64(i64 %a, i64 %b) + ret %out +} + +declare @llvm.aarch64.sve.index.nxv16i8(i8, i8) +declare @llvm.aarch64.sve.index.nxv8i16(i16, i16) +declare @llvm.aarch64.sve.index.nxv4i32(i32, i32) +declare @llvm.aarch64.sve.index.nxv2i64(i64, i64)