Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -786,6 +786,21 @@ llvm_i32_ty], [IntrNoMem]>; + class AdvSIMD_Pred2VectorArg_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>], + [IntrNoMem]>; + + class AdvSIMD_Pred3VectorArg_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>], + [IntrNoMem]>; + class AdvSIMD_SVE_Compare_Intrinsic : Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, @@ -813,6 +828,20 @@ llvm_anyvector_ty], [IntrNoMem]>; + class AdvSIMD_SVE_ShiftByImm_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + llvm_i32_ty], + [IntrNoMem]>; + + class AdvSIMD_SVE_ShiftWide_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + llvm_nxv2i64_ty], + [IntrNoMem]>; + class AdvSIMD_SVE_Unpack_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMSubdivide2VectorType<0>], @@ -849,6 +878,12 @@ [LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; + class AdvSIMD_SVE_INSR_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMVectorElementType<0>], + [IntrNoMem]>; + class AdvSIMD_SVE_PUNPKHI_Intrinsic : Intrinsic<[LLVMHalfElementsVectorType<0>], [llvm_anyvector_ty], @@ -894,18 +929,6 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". - -class AdvSIMD_Pred2VectorArg_Intrinsic - : Intrinsic<[llvm_anyvector_ty], - [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; - -class AdvSIMD_Pred3VectorArg_Intrinsic - : Intrinsic<[llvm_anyvector_ty], - [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; - - // // Integer arithmetic // @@ -914,35 +937,29 @@ def int_aarch64_sve_sub : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_subr : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_and : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_or : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_xor : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_bic : AdvSIMD_2VectorArg_Intrinsic; -def int_aarch64_sve_bic_pred : AdvSIMD_Pred2VectorArg_Intrinsic; - -def int_aarch64_sve_mul : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_smulh : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_umulh : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_mul : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_smulh : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_umulh : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_sdiv : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_udiv : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_sdivr : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_udivr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sdiv : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_udiv : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sdivr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_udivr : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_smax : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_umax : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_smin : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_umin : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_sabd : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_uabd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_smax : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_umax : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_smin : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_umin : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sabd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uabd : AdvSIMD_Pred2VectorArg_Intrinsic; -def int_aarch64_sve_mad : AdvSIMD_Pred3VectorArg_Intrinsic; -def int_aarch64_sve_msb : AdvSIMD_Pred3VectorArg_Intrinsic; -def int_aarch64_sve_mla : AdvSIMD_Pred3VectorArg_Intrinsic; -def int_aarch64_sve_mls : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_mad : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_msb : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_mla : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_mls : AdvSIMD_Pred3VectorArg_Intrinsic; -def int_aarch64_sve_abs : AdvSIMD_Merged1VectorArg_Intrinsic; -def int_aarch64_sve_neg : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_abs : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_neg : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_sdot : AdvSIMD_SVE_DOT_Intrinsic; def int_aarch64_sve_sdot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic; @@ -951,6 +968,29 @@ def int_aarch64_sve_udot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic; // +// Logical operations +// + +def int_aarch64_sve_and : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_bic : AdvSIMD_2VectorArg_Intrinsic; +def int_aarch64_sve_bic_pred : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_cnot : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_not : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_or : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_xor : AdvSIMD_Pred2VectorArg_Intrinsic; + +// Shifts + +def int_aarch64_sve_asr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_asr_wide : AdvSIMD_SVE_ShiftWide_Intrinsic; +def int_aarch64_sve_asrd : AdvSIMD_SVE_ShiftByImm_Intrinsic; +def int_aarch64_sve_insr : AdvSIMD_SVE_INSR_Intrinsic; +def int_aarch64_sve_lsl : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_lsl_wide : AdvSIMD_SVE_ShiftWide_Intrinsic; +def int_aarch64_sve_lsr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_lsr_wide : AdvSIMD_SVE_ShiftWide_Intrinsic; + +// // Counting bits // @@ -969,13 +1009,6 @@ def int_aarch64_sve_uunpklo : AdvSIMD_SVE_Unpack_Intrinsic; // -// Logical operations -// - -def int_aarch64_sve_cnot : AdvSIMD_Merged1VectorArg_Intrinsic; -def int_aarch64_sve_not : AdvSIMD_Merged1VectorArg_Intrinsic; - -// // Conversion // Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -196,6 +196,8 @@ UUNPKHI, UUNPKLO, + INSR, + // NEON Load/Store with post-increment base updates LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, LD3post, Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -668,6 +668,8 @@ setHasExtractBitsInsn(true); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); if (Subtarget->hasNEON()) { // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to @@ -1333,6 +1335,7 @@ case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO"; case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI"; case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO"; + case AArch64ISD::INSR: return "AArch64ISD::INSR"; } return nullptr; } @@ -2884,6 +2887,16 @@ return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(), Op.getOperand(1)); + case Intrinsic::aarch64_sve_insr: { + SDValue Scalar = Op.getOperand(2); + EVT ScalarTy = Scalar.getValueType(); + if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16)) + Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar); + + return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(), + Op.getOperand(1), Scalar); + } + case Intrinsic::localaddress: { const auto &MF = DAG.getMachineFunction(); const auto *RegInfo = Subtarget->getRegisterInfo(); Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -214,6 +214,7 @@ SDTCisSameAs<0, 1>]>; def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; +def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>; def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>; @@ -396,6 +397,8 @@ def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; +def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>; + def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -199,8 +199,8 @@ defm SPLICE_ZPZ : sve_int_perm_splice<"splice">; defm COMPACT_ZPZ : sve_int_perm_compact<"compact">; - defm INSR_ZR : sve_int_perm_insrs<"insr">; - defm INSR_ZV : sve_int_perm_insrv<"insr">; + defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>; + defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>; def EXT_ZZI : sve_int_perm_extract_i<"ext">; defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit">; @@ -876,18 +876,18 @@ defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0000, "asr">; defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0001, "lsr">; defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">; - defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd">; - - defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr">; - defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr">; - defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl">; - defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr">; - defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr">; - defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr">; - - defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr">; - defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr">; - defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl">; + defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", int_aarch64_sve_asrd>; + + defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", int_aarch64_sve_asr>; + defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", int_aarch64_sve_lsr>; + defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", int_aarch64_sve_lsl>; + defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", null_frag>; + defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", null_frag>; + defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", null_frag>; + + defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>; + defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>; + defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>; def FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, ElementSizeS>; def FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, ElementSizeS>; Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -304,6 +304,12 @@ : Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, vt4:$Op4)), (inst $Op1, $Op2, $Op3, $Op4)>; +class SVE_3_Op_Imm_Pat +: Pat<(vtd (op vt1:$Op1, vt2:$Op2, (vt3 ImmTy:$Op3))), + (inst $Op1, $Op2, ImmTy:$Op3)>; + def SVEDup0Undef : ComplexPattern; //===----------------------------------------------------------------------===// @@ -888,14 +894,18 @@ let Constraints = "$Zdn = $_Zdn"; let DestructiveInstType = Destructive; - let ElementSize = ElementSizeNone; } -multiclass sve_int_perm_insrs { +multiclass sve_int_perm_insrs { def _B : sve_int_perm_insrs<0b00, asm, ZPR8, GPR32>; def _H : sve_int_perm_insrs<0b01, asm, ZPR16, GPR32>; def _S : sve_int_perm_insrs<0b10, asm, ZPR32, GPR32>; def _D : sve_int_perm_insrs<0b11, asm, ZPR64, GPR64>; + + def : SVE_2_Op_Pat(NAME # _B)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } class sve_int_perm_insrv sz8_64, string asm, ZPRRegOp zprty, @@ -914,14 +924,17 @@ let Constraints = "$Zdn = $_Zdn"; let DestructiveInstType = Destructive; - let ElementSize = ElementSizeNone; } -multiclass sve_int_perm_insrv { +multiclass sve_int_perm_insrv { def _B : sve_int_perm_insrv<0b00, asm, ZPR8, FPR8>; def _H : sve_int_perm_insrv<0b01, asm, ZPR16, FPR16>; def _S : sve_int_perm_insrv<0b10, asm, ZPR32, FPR32>; def _D : sve_int_perm_insrv<0b11, asm, ZPR64, FPR64>; + + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -3906,7 +3919,8 @@ } } -multiclass sve_int_bin_pred_shift_imm_right opc, string asm> { +multiclass sve_int_bin_pred_shift_imm_right opc, string asm, + SDPatternOperator op = null_frag> { def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8, ElementSizeB>; def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16, @@ -3922,6 +3936,11 @@ let Inst{22} = imm{5}; let Inst{9-8} = imm{4-3}; } + + def : SVE_3_Op_Imm_Pat(NAME # _B)>; + def : SVE_3_Op_Imm_Pat(NAME # _H)>; + def : SVE_3_Op_Imm_Pat(NAME # _S)>; + def : SVE_3_Op_Imm_Pat(NAME # _D)>; } class sve_int_bin_pred_shift sz8_64, bit wide, bits<3> opc, @@ -3948,17 +3967,28 @@ let ElementSize = zprty.ElementSize; } -multiclass sve_int_bin_pred_shift opc, string asm> { +multiclass sve_int_bin_pred_shift opc, string asm, + SDPatternOperator op> { def _B : sve_int_bin_pred_shift<0b00, 0b0, opc, asm, ZPR8, ZPR8>; def _H : sve_int_bin_pred_shift<0b01, 0b0, opc, asm, ZPR16, ZPR16>; def _S : sve_int_bin_pred_shift<0b10, 0b0, opc, asm, ZPR32, ZPR32>; def _D : sve_int_bin_pred_shift<0b11, 0b0, opc, asm, ZPR64, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _B)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } -multiclass sve_int_bin_pred_shift_wide opc, string asm> { +multiclass sve_int_bin_pred_shift_wide opc, string asm, + SDPatternOperator op> { def _B : sve_int_bin_pred_shift<0b00, 0b1, opc, asm, ZPR8, ZPR64>; def _H : sve_int_bin_pred_shift<0b01, 0b1, opc, asm, ZPR16, ZPR64>; def _S : sve_int_bin_pred_shift<0b10, 0b1, opc, asm, ZPR32, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _B)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; } //===----------------------------------------------------------------------===// Index: llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll @@ -0,0 +1,367 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; ASR +; + +define @asr_i8( %pg, %a, %b) { +; CHECK-LABEL: asr_i8: +; CHECK: asr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @asr_i16( %pg, %a, %b) { +; CHECK-LABEL: asr_i16: +; CHECK: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @asr_i32( %pg, %a, %b) { +; CHECK-LABEL: asr_i32: +; CHECK: asr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @asr_i64( %pg, %a, %b) { +; CHECK-LABEL: asr_i64: +; CHECK: asr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @asr_wide_i8( %pg, %a, %b) { +; CHECK-LABEL: asr_wide_i8: +; CHECK: asr z0.b, p0/m, z0.b, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.wide.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @asr_wide_i16( %pg, %a, %b) { +; CHECK-LABEL: asr_wide_i16: +; CHECK: asr z0.h, p0/m, z0.h, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.wide.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @asr_wide_i32( %pg, %a, %b) { +; CHECK-LABEL: asr_wide_i32: +; CHECK: asr z0.s, p0/m, z0.s, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.wide.nxv4i32( %pg, + %a, + %b) + ret %out +} + +; +; ASRD +; + +define @asrd_i8( %pg, %a) { +; CHECK-LABEL: asrd_i8: +; CHECK: asrd z0.b, p0/m, z0.b, #1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asrd.nxv16i8( %pg, + %a, + i32 1) + ret %out +} + +define @asrd_i16( %pg, %a) { +; CHECK-LABEL: asrd_i16: +; CHECK: asrd z0.h, p0/m, z0.h, #2 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asrd.nxv8i16( %pg, + %a, + i32 2) + ret %out +} + +define @asrd_i32( %pg, %a) { +; CHECK-LABEL: asrd_i32: +; CHECK: asrd z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asrd.nxv4i32( %pg, + %a, + i32 31) + ret %out +} + +define @asrd_i64( %pg, %a) { +; CHECK-LABEL: asrd_i64: +; CHECK: asrd z0.d, p0/m, z0.d, #64 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asrd.nxv2i64( %pg, + %a, + i32 64) + ret %out +} + +; +; INSR +; + +define @insr_i8( %a, i8 %b) { +; CHECK-LABEL: insr_i8: +; CHECK: insr z0.b, w0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.insr.nxv16i8( %a, i8 %b) + ret %out +} + +define @insr_i16( %a, i16 %b) { +; CHECK-LABEL: insr_i16: +; CHECK: insr z0.h, w0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.insr.nxv8i16( %a, i16 %b) + ret %out +} + +define @insr_i32( %a, i32 %b) { +; CHECK-LABEL: insr_i32: +; CHECK: insr z0.s, w0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.insr.nxv4i32( %a, i32 %b) + ret %out +} + +define @insr_i64( %a, i64 %b) { +; CHECK-LABEL: insr_i64: +; CHECK: insr z0.d, x0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.insr.nxv2i64( %a, i64 %b) + ret %out +} + +define @insr_f16( %a, half %b) { +; CHECK-LABEL: insr_f16: +; CHECK: insr z0.h, h1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.insr.nxv8f16( %a, half %b) + ret %out +} + +define @insr_f32( %a, float %b) { +; CHECK-LABEL: insr_f32: +; CHECK: insr z0.s, s1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.insr.nxv4f32( %a, float %b) + ret %out +} + +define @insr_f64( %a, double %b) { +; CHECK-LABEL: insr_f64: +; CHECK: insr z0.d, d1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.insr.nxv2f64( %a, double %b) + ret %out +} + +; +; LSL +; + +define @lsl_i8( %pg, %a, %b) { +; CHECK-LABEL: lsl_i8: +; CHECK: lsl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @lsl_i16( %pg, %a, %b) { +; CHECK-LABEL: lsl_i16: +; CHECK: lsl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @lsl_i32( %pg, %a, %b) { +; CHECK-LABEL: lsl_i32: +; CHECK: lsl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @lsl_i64( %pg, %a, %b) { +; CHECK-LABEL: lsl_i64: +; CHECK: lsl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @lsl_wide_i8( %pg, %a, %b) { +; CHECK-LABEL: lsl_wide_i8: +; CHECK: lsl z0.b, p0/m, z0.b, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.wide.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @lsl_wide_i16( %pg, %a, %b) { +; CHECK-LABEL: lsl_wide_i16: +; CHECK: lsl z0.h, p0/m, z0.h, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.wide.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @lsl_wide_i32( %pg, %a, %b) { +; CHECK-LABEL: lsl_wide_i32: +; CHECK: lsl z0.s, p0/m, z0.s, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.wide.nxv4i32( %pg, + %a, + %b) + ret %out +} + +; +; LSR +; + +define @lsr_i8( %pg, %a, %b) { +; CHECK-LABEL: lsr_i8: +; CHECK: lsr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @lsr_i16( %pg, %a, %b) { +; CHECK-LABEL: lsr_i16: +; CHECK: lsr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @lsr_i32( %pg, %a, %b) { +; CHECK-LABEL: lsr_i32: +; CHECK: lsr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv4i32( %pg, + %a, + %b) + ret %out +} + +define @lsr_i64( %pg, %a, %b) { +; CHECK-LABEL: lsr_i64: +; CHECK: lsr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv2i64( %pg, + %a, + %b) + ret %out +} + +define @lsr_wide_i8( %pg, %a, %b) { +; CHECK-LABEL: lsr_wide_i8: +; CHECK: lsr z0.b, p0/m, z0.b, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.wide.nxv16i8( %pg, + %a, + %b) + ret %out +} + +define @lsr_wide_i16( %pg, %a, %b) { +; CHECK-LABEL: lsr_wide_i16: +; CHECK: lsr z0.h, p0/m, z0.h, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.wide.nxv8i16( %pg, + %a, + %b) + ret %out +} + +define @lsr_wide_i32( %pg, %a, %b) { +; CHECK-LABEL: lsr_wide_i32: +; CHECK: lsr z0.s, p0/m, z0.s, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.wide.nxv4i32( %pg, + %a, + %b) + ret %out +} + +declare @llvm.aarch64.sve.asr.nxv16i8(, , ) +declare @llvm.aarch64.sve.asr.nxv8i16(, , ) +declare @llvm.aarch64.sve.asr.nxv4i32(, , ) +declare @llvm.aarch64.sve.asr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.asr.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.asr.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.asr.wide.nxv4i32(, , ) + +declare @llvm.aarch64.sve.asrd.nxv16i8(, , i32) +declare @llvm.aarch64.sve.asrd.nxv8i16(, , i32) +declare @llvm.aarch64.sve.asrd.nxv4i32(, , i32) +declare @llvm.aarch64.sve.asrd.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.insr.nxv16i8(, i8) +declare @llvm.aarch64.sve.insr.nxv8i16(, i16) +declare @llvm.aarch64.sve.insr.nxv4i32(, i32) +declare @llvm.aarch64.sve.insr.nxv2i64(, i64) +declare @llvm.aarch64.sve.insr.nxv8f16(, half) +declare @llvm.aarch64.sve.insr.nxv4f32(, float) +declare @llvm.aarch64.sve.insr.nxv2f64(, double) + +declare @llvm.aarch64.sve.lsl.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsl.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsl.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsl.nxv2i64(, , ) + +declare @llvm.aarch64.sve.lsl.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsl.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsl.wide.nxv4i32(, , ) + +declare @llvm.aarch64.sve.lsr.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsr.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsr.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.lsr.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsr.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsr.wide.nxv4i32(, , )