Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -59,6 +59,9 @@ UMIN_PRED, SMAX_PRED, UMAX_PRED, + SHL_PRED, + SRL_PRED, + SRA_PRED, // Arithmetic instructions which write flags. ADDS, Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -887,6 +887,9 @@ setOperationAction(ISD::UMIN, VT, Custom); setOperationAction(ISD::SMAX, VT, Custom); setOperationAction(ISD::UMAX, VT, Custom); + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); } } setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); @@ -1289,6 +1292,9 @@ case AArch64ISD::UMIN_PRED: return "AArch64ISD::UMIN_PRED"; case AArch64ISD::SMAX_PRED: return "AArch64ISD::SMAX_PRED"; case AArch64ISD::UMAX_PRED: return "AArch64ISD::UMAX_PRED"; + case AArch64ISD::SHL_PRED: return "AArch64ISD::SHL_PRED"; + case AArch64ISD::SRL_PRED: return "AArch64ISD::SRL_PRED"; + case AArch64ISD::SRA_PRED: return "AArch64ISD::SRA_PRED"; case AArch64ISD::ADC: return "AArch64ISD::ADC"; case AArch64ISD::SBC: return "AArch64ISD::SBC"; case AArch64ISD::ADDS: return "AArch64ISD::ADDS"; @@ -8597,6 +8603,9 @@ llvm_unreachable("unexpected shift opcode"); case ISD::SHL: + if (VT.isScalableVector()) + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED); + if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0), DAG.getConstant(Cnt, DL, MVT::i32)); @@ -8606,6 +8615,12 @@ Op.getOperand(0), Op.getOperand(1)); case ISD::SRA: case ISD::SRL: + if (VT.isScalableVector()) { + unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED + : AArch64ISD::SRL_PRED; + return LowerToPredicatedOp(Op, DAG, Opc); + } + // Right shift immediate if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) { unsigned Opc = @@ -11461,6 +11476,15 @@ case Intrinsic::aarch64_sve_umax: return DAG.getNode(AArch64ISD::UMAX_PRED, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); + case Intrinsic::aarch64_sve_lsl: + return DAG.getNode(AArch64ISD::SHL_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); + case Intrinsic::aarch64_sve_lsr: + return DAG.getNode(AArch64ISD::SRL_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); + case Intrinsic::aarch64_sve_asr: + return DAG.getNode(AArch64ISD::SRA_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_fadda: return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG); case Intrinsic::aarch64_sve_faddv: Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// def SVE8BitLslImm : ComplexPattern; -def SVELShiftImm64 : ComplexPattern", []>; // Contiguous loads - node definitions // @@ -154,12 +153,15 @@ SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3> ]>; -def AArch64sdiv_pred : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>; -def AArch64udiv_pred : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>; -def AArch64smin_pred : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>; -def AArch64umin_pred : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>; -def AArch64smax_pred : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>; -def AArch64umax_pred : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>; +def AArch64sdiv_pred : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>; +def AArch64udiv_pred : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>; +def AArch64smin_pred : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>; +def AArch64umin_pred : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>; +def AArch64smax_pred : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>; +def AArch64umax_pred : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>; +def AArch64lsl_pred : SDNode<"AArch64ISD::SHL_PRED", SDT_AArch64Arith>; +def AArch64lsr_pred : SDNode<"AArch64ISD::SRL_PRED", SDT_AArch64Arith>; +def AArch64asr_pred : SDNode<"AArch64ISD::SRA_PRED", SDT_AArch64Arith>; def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>; def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; @@ -1158,23 +1160,9 @@ defm INDEX_II : sve_int_index_ii<"index", index_vector>; // Unpredicated shifts - defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", sra>; - defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", srl>; - defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", shl>; - - // Patterns for unpredicated left shift by immediate - def : Pat<(nxv16i8 (shl (nxv16i8 ZPR:$Zs1), - (nxv16i8 (AArch64dup (vecshiftL8:$imm))))), - (LSL_ZZI_B ZPR:$Zs1, vecshiftL8:$imm)>; - def : Pat<(nxv8i16 (shl (nxv8i16 ZPR:$Zs1), - (nxv8i16 (AArch64dup (vecshiftL16:$imm))))), - (LSL_ZZI_H ZPR:$Zs1, vecshiftL16:$imm)>; - def : Pat<(nxv4i32 (shl (nxv4i32 ZPR:$Zs1), - (nxv4i32 (AArch64dup (vecshiftL32:$imm))))), - (LSL_ZZI_S ZPR:$Zs1, vecshiftL32:$imm)>; - def : Pat<(nxv2i64 (shl (nxv2i64 ZPR:$Zs1), - (nxv2i64 (AArch64dup (i64 (SVELShiftImm64 i32:$imm)))))), - (LSL_ZZI_D ZPR:$Zs1, vecshiftL64:$imm)>; + defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_pred>; + defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_pred>; + defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_pred>; defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr">; defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr">; @@ -1186,14 +1174,14 @@ defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">; defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>; - defm ASR_ZPZZ : sve_int_bin_pred_zx; - defm LSR_ZPZZ : sve_int_bin_pred_zx; - defm LSL_ZPZZ : sve_int_bin_pred_zx; + defm ASR_ZPZZ : sve_int_bin_pred_zx; + defm LSR_ZPZZ : sve_int_bin_pred_zx; + defm LSL_ZPZZ : sve_int_bin_pred_zx; defm ASRD_ZPZI : sve_int_bin_pred_shift_0_right_zx; - defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ", 1>; - defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ", 1>; - defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ", 1>; + defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", AArch64asr_pred, "ASRR_ZPmZ", 1>; + defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", AArch64lsr_pred, "LSRR_ZPmZ", 1>; + defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", AArch64lsl_pred, "LSLR_ZPmZ", 1>; defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", "ASRR_ZPZZ", null_frag, "ASR_ZPmZ", 0>; defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", "LSRR_ZPZZ", null_frag, "LSR_ZPmZ", 0>; defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", "LSLR_ZPZZ", null_frag, "LSL_ZPmZ", 0>; Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -215,6 +215,8 @@ def SVEArithUImmPat : ComplexPattern; def SVEArithSImmPat : ComplexPattern; +def SVEShiftImm64 : ComplexPattern", []>; + class SVEExactFPImm : AsmOperandClass { let Name = "SVEExactFPImmOperand" # Suffix; let DiagnosticType = "Invalid" # Name; @@ -324,6 +326,11 @@ : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm)))))), (inst $Op1, i32:$imm)>; +class SVE_1_Op_Imm_Shift_Pred_Pat + : Pat<(vt (op (pt (AArch64ptrue 31)), (vt zprty:$Op1), (vt (AArch64dup (ImmTy:$imm))))), + (inst $Op1, ImmTy:$imm)>; + class SVE_1_Op_Imm_Arith_Pred_Pat : Pat<(vt (op (pt (AArch64ptrue 31)), (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm)))))), @@ -4947,12 +4954,11 @@ } class sve_int_bin_cons_shift_imm tsz8_64, bits<2> opc, string asm, - ZPRRegOp zprty, Operand immtype, ValueType vt, - SDPatternOperator op> + ZPRRegOp zprty, Operand immtype> : I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm), asm, "\t$Zd, $Zn, $imm", "", - [(set (vt zprty:$Zd), (op (vt zprty:$Zn), immtype:$imm))]>, Sched<[]> { + []>, Sched<[]> { bits<5> Zd; bits<5> Zn; bits<6> imm; @@ -4968,33 +4974,43 @@ } multiclass sve_int_bin_cons_shift_imm_left opc, string asm, - SDPatternOperator op> { - def _B : sve_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8, nxv16i8, op>; - def _H : sve_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16, nxv8i16, op> { + SDPatternOperator op> { + def _B : sve_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; + def _H : sve_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { let Inst{19} = imm{3}; } - def _S : sve_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32, nxv4i32, op> { + def _S : sve_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { let Inst{20-19} = imm{4-3}; } - def _D : sve_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64, nxv2i64, op> { + def _D : sve_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } + + def : SVE_1_Op_Imm_Shift_Pred_Pat(NAME # _B)>; + def : SVE_1_Op_Imm_Shift_Pred_Pat(NAME # _H)>; + def : SVE_1_Op_Imm_Shift_Pred_Pat(NAME # _S)>; + def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _D)>; } multiclass sve_int_bin_cons_shift_imm_right opc, string asm, - SDPatternOperator op> { - def _B : sve_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8, nxv16i8, op>; - def _H : sve_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16, nxv8i16, op> { + SDPatternOperator op> { + def _B : sve_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; + def _H : sve_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { let Inst{19} = imm{3}; } - def _S : sve_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32, nxv4i32, op> { + def _S : sve_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { let Inst{20-19} = imm{4-3}; } - def _D : sve_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64, nxv2i64, op> { + def _D : sve_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } + + def : SVE_1_Op_Imm_Shift_Pred_Pat(NAME # _B)>; + def : SVE_1_Op_Imm_Shift_Pred_Pat(NAME # _H)>; + def : SVE_1_Op_Imm_Shift_Pred_Pat(NAME # _S)>; + def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// // SVE Memory - Store Group Index: llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll =================================================================== --- llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll +++ llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll @@ -426,3 +426,183 @@ %max = select %cmp, %a, %b ret %max } + +; +; ASR +; + +define @asr_i8( %a, %b){ +; CHECK-LABEL: @asr_i8 +; CHECK-DAG: ptrue p0.b +; CHECK-DAG: asr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %shr = ashr %a, %b + ret %shr +} + +define @asr_i16( %a, %b){ +; CHECK-LABEL: @asr_i16 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %shr = ashr %a, %b + ret %shr +} + +define @asr_i32( %a, %b){ +; CHECK-LABEL: @asr_i32 +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: asr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %shr = ashr %a, %b + ret %shr +} + +define @asr_i64( %a, %b){ +; CHECK-LABEL: @asr_i64 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: asr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %shr = ashr %a, %b + ret %shr +} + +define @asr_split_i16( %a, %b){ +; CHECK-LABEL: @asr_split_i16 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: asr z0.h, p0/m, z0.h, z2.h +; CHECK-DAG: asr z1.h, p0/m, z1.h, z3.h +; CHECK-NEXT: ret + %shr = ashr %a, %b + ret %shr +} + +define @asr_promote_i32( %a, %b){ +; CHECK-LABEL: @asr_promote_i32 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: and z1.d, z1.d, #0xffffffff +; CHECK-DAG: asr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %shr = ashr %a, %b + ret %shr +} + +; +; LSL +; + +define @lsl_i8( %a, %b){ +; CHECK-LABEL: @lsl_i8 +; CHECK-DAG: ptrue p0.b +; CHECK-DAG: lsl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %shl = shl %a, %b + ret %shl +} + +define @lsl_i16( %a, %b){ +; CHECK-LABEL: @lsl_i16 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: lsl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %shl = shl %a, %b + ret %shl +} + +define @lsl_i32( %a, %b){ +; CHECK-LABEL: @lsl_i32 +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: lsl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %shl = shl %a, %b + ret %shl +} + +define @lsl_i64( %a, %b){ +; CHECK-LABEL: @lsl_i64 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: lsl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %shl = shl %a, %b + ret %shl +} + +define @lsl_split_i64( %a, %b){ +; CHECK-LABEL: @lsl_split_i64 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: lsl z0.d, p0/m, z0.d, z2.d +; CHECK-DAG: lsl z1.d, p0/m, z1.d, z3.d +; CHECK-NEXT: ret + %shl = shl %a, %b + ret %shl +} + +define @lsl_promote_i16( %a, %b){ +; CHECK-LABEL: @lsl_promote_i16 +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: and z1.s, z1.s, #0xffff +; CHECK-DAG: lsl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %shl = shl %a, %b + ret %shl +} + +; +; LSR +; + +define @lsr_i8( %a, %b){ +; CHECK-LABEL: @lsr_i8 +; CHECK-DAG: ptrue p0.b +; CHECK-DAG: lsr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %shr = lshr %a, %b + ret %shr +} + +define @lsr_i16( %a, %b){ +; CHECK-LABEL: @lsr_i16 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: lsr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %shr = lshr %a, %b + ret %shr +} + +define @lsr_i32( %a, %b){ +; CHECK-LABEL: @lsr_i32 +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: lsr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %shr = lshr %a, %b + ret %shr +} + +define @lsr_i64( %a, %b){ +; CHECK-LABEL: @lsr_i64 +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: lsr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %shr = lshr %a, %b + ret %shr +} + +define @lsr_promote_i8( %a, %b){ +; CHECK-LABEL: @lsr_promote_i8 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: and z1.h, z1.h, #0xff +; CHECK-DAG: lsr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %shr = lshr %a, %b + ret %shr +} + +define @lsr_split_i32( %a, %b){ +; CHECK-LABEL: @lsr_split_i32 +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: lsr z0.s, p0/m, z0.s, z2.s +; CHECK-DAG: lsr z1.s, p0/m, z1.s, z3.s +; CHECK-NEXT: ret + %shr = lshr %a, %b + ret %shr +} Index: llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll +++ llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll @@ -482,3 +482,129 @@ %res = mul %a, %splat ret %res } + +; ASR + +define @asr_i8( %a){ +; CHECK-LABEL: @asr_i8 +; CHECK-DAG: asr z0.b, z0.b, #8 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 8, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %lshr = ashr %a, %splat + ret %lshr +} + +define @asr_i16( %a){ +; CHECK-LABEL: @asr_i16 +; CHECK-DAG: asr z0.h, z0.h, #16 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 16, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %ashr = ashr %a, %splat + ret %ashr +} + +define @asr_i32( %a){ +; CHECK-LABEL: @asr_i32 +; CHECK-DAG: asr z0.s, z0.s, #32 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 32, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %ashr = ashr %a, %splat + ret %ashr +} + +define @asr_i64( %a){ +; CHECK-LABEL: @asr_i64 +; CHECK-DAG: asr z0.d, z0.d, #64 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 64, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %ashr = ashr %a, %splat + ret %ashr +} + +; LSL + +define @lsl_i8( %a){ +; CHECK-LABEL: @lsl_i8 +; CHECK-DAG: lsl z0.b, z0.b, #7 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 7, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %shl = shl %a, %splat + ret %shl +} + +define @lsl_i16( %a){ +; CHECK-LABEL: @lsl_i16 +; CHECK-DAG: lsl z0.h, z0.h, #15 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 15, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %shl = shl %a, %splat + ret %shl +} + +define @lsl_i32( %a){ +; CHECK-LABEL: @lsl_i32 +; CHECK-DAG: lsl z0.s, z0.s, #31 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 31, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %shl = shl %a, %splat + ret %shl +} + +define @lsl_i64( %a){ +; CHECK-LABEL: @lsl_i64 +; CHECK-DAG: lsl z0.d, z0.d, #63 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 63, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %shl = shl %a, %splat + ret %shl +} + +; LSR + +define @lsr_i8( %a){ +; CHECK-LABEL: @lsr_i8 +; CHECK-DAG: lsr z0.b, z0.b, #8 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 8, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %lshr = lshr %a, %splat + ret %lshr +} + +define @lsr_i16( %a){ +; CHECK-LABEL: @lsr_i16 +; CHECK-DAG: lsr z0.h, z0.h, #16 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 16, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %lshr = lshr %a, %splat + ret %lshr +} + +define @lsr_i32( %a){ +; CHECK-LABEL: @lsr_i32 +; CHECK-DAG: lsr z0.s, z0.s, #32 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 32, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %lshr = lshr %a, %splat + ret %lshr +} + +define @lsr_i64( %a){ +; CHECK-LABEL: @lsr_i64 +; CHECK-DAG: lsr z0.d, z0.d, #64 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 64, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %lshr = lshr %a, %splat + ret %lshr +} Index: llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll @@ -533,6 +533,168 @@ ret %out } +; ASR + +define @asr_i8( %a) { +; CHECK-LABEL: asr_i8: +; CHECK: asr z0.b, z0.b, #8 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %elt = insertelement undef, i8 8, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @asr_i16( %a) { +; CHECK-LABEL: asr_i16: +; CHECK: asr z0.h, z0.h, #16 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %elt = insertelement undef, i16 16, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @asr_i32( %a) { +; CHECK-LABEL: asr_i32: +; CHECK: asr z0.s, z0.s, #32 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %elt = insertelement undef, i32 32, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @asr_i64( %a) { +; CHECK-LABEL: asr_i64: +; CHECK: asr z0.d, z0.d, #64 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %elt = insertelement undef, i64 64, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv2i64( %pg, + %a, + %splat) + ret %out +} + +; LSL + +define @lsl_i8( %a) { +; CHECK-LABEL: lsl_i8: +; CHECK: lsl z0.b, z0.b, #7 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %elt = insertelement undef, i8 7, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i16( %a) { +; CHECK-LABEL: lsl_i16: +; CHECK: lsl z0.h, z0.h, #15 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %elt = insertelement undef, i16 15, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i32( %a) { +; CHECK-LABEL: lsl_i32: +; CHECK: lsl z0.s, z0.s, #31 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %elt = insertelement undef, i32 31, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i64( %a) { +; CHECK-LABEL: lsl_i64: +; CHECK: lsl z0.d, z0.d, #63 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %elt = insertelement undef, i64 63, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, + %a, + %splat) + ret %out +} + +; LSR + +define @lsr_i8( %a) { +; CHECK-LABEL: lsr_i8: +; CHECK: lsr z0.b, z0.b, #8 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %elt = insertelement undef, i8 8, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @lsr_i16( %a) { +; CHECK-LABEL: lsr_i16: +; CHECK: lsr z0.h, z0.h, #16 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %elt = insertelement undef, i16 16, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @lsr_i32( %a) { +; CHECK-LABEL: lsr_i32: +; CHECK: lsr z0.s, z0.s, #32 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %elt = insertelement undef, i32 32, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @lsr_i64( %a) { +; CHECK-LABEL: lsr_i64: +; CHECK: lsr z0.d, z0.d, #64 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %elt = insertelement undef, i64 64, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv2i64( %pg, + %a, + %splat) + ret %out +} + declare @llvm.aarch64.sve.sqadd.x.nxv16i8(, ) declare @llvm.aarch64.sve.sqadd.x.nxv8i16(, ) declare @llvm.aarch64.sve.sqadd.x.nxv4i32(, ) @@ -573,6 +735,21 @@ declare @llvm.aarch64.sve.umin.nxv4i32(, , ) declare @llvm.aarch64.sve.umin.nxv2i64(, , ) +declare @llvm.aarch64.sve.asr.nxv16i8(, , ) +declare @llvm.aarch64.sve.asr.nxv8i16(, , ) +declare @llvm.aarch64.sve.asr.nxv4i32(, , ) +declare @llvm.aarch64.sve.asr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.lsl.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsl.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsl.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsl.nxv2i64(, , ) + +declare @llvm.aarch64.sve.lsr.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsr.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsr.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsr.nxv2i64(, , ) + declare @llvm.aarch64.sve.ptrue.nxv16i1(i32 %pattern) declare @llvm.aarch64.sve.ptrue.nxv8i1(i32 %pattern) declare @llvm.aarch64.sve.ptrue.nxv4i1(i32 %pattern)