diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -412,6 +412,7 @@ } LLVM_FALLTHROUGH; case AArch64::DestructiveBinary: + case AArch64::DestructiveBinaryImm: std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3); break; default: @@ -430,6 +431,9 @@ DstReg != MI.getOperand(DOPIdx).getReg() || MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg(); break; + case AArch64::DestructiveBinaryImm: + DOPRegIsUnique = true; + break; } assert (DOPRegIsUnique && "The destructive operand should be unique"); @@ -498,6 +502,7 @@ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)); switch (DType) { + case AArch64::DestructiveBinaryImm: case AArch64::DestructiveBinaryComm: case AArch64::DestructiveBinaryCommWithRev: DOP.add(MI.getOperand(PredIdx)) diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1131,17 +1131,22 @@ defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl">; // Predicated shifts - defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0000, "asr">; - defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0001, "lsr">; + defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0000, "asr", "ASR_ZPZI">; + defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0001, "lsr", "LSR_ZPZI">; defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">; - defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", int_aarch64_sve_asrd>; + defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>; - defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", int_aarch64_sve_asr>; - defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", int_aarch64_sve_lsr>; - defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", int_aarch64_sve_lsl>; - defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", null_frag>; - defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", null_frag>; - defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", null_frag>; + defm ASR_ZPZZ : sve_int_bin_pred_zx; + defm LSR_ZPZZ : sve_int_bin_pred_zx; + defm LSL_ZPZZ : sve_int_bin_pred_zx; + defm ASRD_ZPZI : sve_int_bin_pred_shift_0_right_zx; + + defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ", 1>; + defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ", 1>; + defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ", 1>; + defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", "ASRR_ZPZZ", null_frag, "ASR_ZPmZ", 0>; + defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", "LSRR_ZPZZ", null_frag, "LSR_ZPmZ", 0>; + defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", "LSLR_ZPZZ", null_frag, "LSL_ZPmZ", 0>; defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>; defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>; @@ -1777,10 +1782,10 @@ defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr", null_frag>; // SVE2 predicated shifts - defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">; - defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">; - defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr", int_aarch64_sve_srshr>; - defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", int_aarch64_sve_urshr>; + defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl", "SQSHL_ZPZI">; + defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl", "UQSHL_ZPZI">; + defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr", "SRSHR_ZPZI", int_aarch64_sve_srshr>; + defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", "URSHR_ZPZI", int_aarch64_sve_urshr>; defm SQSHLU_ZPmI : sve2_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", int_aarch64_sve_sqshlu>; // SVE2 integer add/subtract long diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -375,6 +375,12 @@ ValueType vt2, ValueType vt3, Instruction inst> : Pat<(vtd (vtd (op vt1:$Op1, (vselect vt1:$Op1, vt2:$Op2, (SVEDup0)), vt3:$Op3))), (inst $Op1, $Op2, $Op3)>; + +class SVE_3_Op_Pat_Shift_Imm_SelZero +: Pat<(vtd (op vt1:$Op1, (vselect vt1:$Op1, vt2:$Op2, (SVEDup0)), (i32 (vt3:$Op3)))), + (inst $Op1, $Op2, vt3:$Op3)>; } // @@ -433,6 +439,13 @@ Pseudo<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zs1, zprty:$Zs2), []> { let FalseLanes = flags; } + + class PredTwoOpImmPseudo + : SVEPseudo2Instr, + Pseudo<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zs1, immty:$imm), []> { + let FalseLanes = flags; + } } //===----------------------------------------------------------------------===// @@ -4692,19 +4705,23 @@ let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; - let DestructiveInstType = DestructiveOther; + let DestructiveInstType = DestructiveBinaryImm; let ElementSize = zprty.ElementSize; } -multiclass sve_int_bin_pred_shift_imm_left opc, string asm> { - def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; - def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { +multiclass sve_int_bin_pred_shift_imm_left opc, string asm, string psName=""> { + def _B : SVEPseudo2Instr, + sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; + def _H : SVEPseudo2Instr, + sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { let Inst{8} = imm{3}; } - def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { + def _S : SVEPseudo2Instr, + sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { let Inst{9-8} = imm{4-3}; } - def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { + def _D : SVEPseudo2Instr, + sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { let Inst{22} = imm{5}; let Inst{9-8} = imm{4-3}; } @@ -4730,16 +4747,20 @@ def : SVE_3_Op_Imm_Pat(NAME # _D)>; } -multiclass sve_int_bin_pred_shift_imm_right opc, string asm, +multiclass sve_int_bin_pred_shift_imm_right opc, string asm, string Ps, SDPatternOperator op = null_frag> { - def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; - def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { + def _B : SVEPseudo2Instr, + sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; + def _H : SVEPseudo2Instr, + sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { let Inst{8} = imm{3}; } - def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { + def _S : SVEPseudo2Instr, + sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { let Inst{9-8} = imm{4-3}; } - def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { + def _D : SVEPseudo2Instr, + sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { let Inst{22} = imm{5}; let Inst{9-8} = imm{4-3}; } @@ -4750,6 +4771,18 @@ def : SVE_3_Op_Imm_Pat(NAME # _D)>; } +multiclass sve_int_bin_pred_shift_0_right_zx { + def _ZERO_B : PredTwoOpImmPseudo; + def _ZERO_H : PredTwoOpImmPseudo; + def _ZERO_S : PredTwoOpImmPseudo; + def _ZERO_D : PredTwoOpImmPseudo; + + def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_B)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_H)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_S)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_D)>; +} + class sve_int_bin_pred_shift sz8_64, bit wide, bits<3> opc, string asm, ZPRRegOp zprty, ZPRRegOp zprty2> : I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty2:$Zm), @@ -4774,19 +4807,36 @@ let ElementSize = zprty.ElementSize; } -multiclass sve_int_bin_pred_shift opc, string asm, - SDPatternOperator op> { - def _B : sve_int_bin_pred_shift<0b00, 0b0, opc, asm, ZPR8, ZPR8>; - def _H : sve_int_bin_pred_shift<0b01, 0b0, opc, asm, ZPR16, ZPR16>; - def _S : sve_int_bin_pred_shift<0b10, 0b0, opc, asm, ZPR32, ZPR32>; - def _D : sve_int_bin_pred_shift<0b11, 0b0, opc, asm, ZPR64, ZPR64>; - +multiclass sve_int_bin_pred_shift opc, string asm, string Ps, + SDPatternOperator op, string revname, bit isOrig> { + let DestructiveInstType = DestructiveBinaryCommWithRev in { + def _B : sve_int_bin_pred_shift<0b00, 0b0, opc, asm, ZPR8, ZPR8>, + SVEPseudo2Instr, SVEInstr2Rev; + def _H : sve_int_bin_pred_shift<0b01, 0b0, opc, asm, ZPR16, ZPR16>, + SVEPseudo2Instr, SVEInstr2Rev; + def _S : sve_int_bin_pred_shift<0b10, 0b0, opc, asm, ZPR32, ZPR32>, + SVEPseudo2Instr, SVEInstr2Rev; + def _D : sve_int_bin_pred_shift<0b11, 0b0, opc, asm, ZPR64, ZPR64>, + SVEPseudo2Instr, SVEInstr2Rev; + } def : SVE_3_Op_Pat(NAME # _B)>; def : SVE_3_Op_Pat(NAME # _H)>; def : SVE_3_Op_Pat(NAME # _S)>; def : SVE_3_Op_Pat(NAME # _D)>; } +multiclass sve_int_bin_pred_zx { + def _ZERO_B : PredTwoOpPseudo; + def _ZERO_H : PredTwoOpPseudo; + def _ZERO_S : PredTwoOpPseudo; + def _ZERO_D : PredTwoOpPseudo; + + def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_B)>; + def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_H)>; + def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_S)>; + def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_D)>; +} + multiclass sve_int_bin_pred_shift_wide opc, string asm, SDPatternOperator op> { def _B : sve_int_bin_pred_shift<0b00, 0b1, opc, asm, ZPR8, ZPR64>; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-merging.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-merging.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-merging.ll @@ -0,0 +1,340 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; ASR +; + +define @asr_i8( %pg, %a, %b) { +; CHECK-LABEL: asr_i8: +; CHECK: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv16i8( %pg, + %a_z, + %b) + ret %out +} + +define @asr_i16( %pg, %a, %b) { +; CHECK-LABEL: asr_i16: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv8i16( %pg, + %a_z, + %b) + ret %out +} + +define @asr_i32( %pg, %a, %b) { +; CHECK-LABEL: asr_i32: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv4i32( %pg, + %a_z, + %b) + ret %out +} + +define @asr_i64( %pg, %a, %b) { +; CHECK-LABEL: asr_i64: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv2i64( %pg, + %a_z, + %b) + ret %out +} + +define @asr_wide_i8( %pg, %a, %b) { +; CHECK-LABEL: asr_wide_i8: +; CHECK-NOT: movprfx +; CHECK: asr z0.b, p0/m, z0.b, z1.d + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.asr.wide.nxv16i8( %pg, + %a_z, + %b) + ret %out +} + +define @asr_wide_i16( %pg, %a, %b) { +; CHECK-LABEL: asr_wide_i16: +; CHECK-NOT: movprfx +; CHECK: asr z0.h, p0/m, z0.h, z1.d + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.asr.wide.nxv8i16( %pg, + %a_z, + %b) + ret %out +} + +define @asr_wide_i32( %pg, %a, %b) { +; CHECK-LABEL: asr_wide_i32: +; CHECK-NOT: movprfx +; CHECK: asr z0.s, p0/m, z0.s, z1.d + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.asr.wide.nxv4i32( %pg, + %a_z, + %b) + ret %out +} + +; +; ASRD +; + +define @asrd_i8( %pg, %a) { +; CHECK-LABEL: asrd_i8: +; CHECK: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #1 +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.asrd.nxv16i8( %pg, + %a_z, + i32 1) + ret %out +} + +define @asrd_i16( %pg, %a) { +; CHECK-LABEL: asrd_i16: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #2 +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.asrd.nxv8i16( %pg, + %a_z, + i32 2) + ret %out +} + +define @asrd_i32( %pg, %a) { +; CHECK-LABEL: asrd_i32: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.asrd.nxv4i32( %pg, + %a_z, + i32 31) + ret %out +} + +define @asrd_i64( %pg, %a) { +; CHECK-LABEL: asrd_i64: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #64 +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.asrd.nxv2i64( %pg, + %a_z, + i32 64) + ret %out +} + +; +; LSL +; + +define @lsl_i8( %pg, %a, %b) { +; CHECK-LABEL: lsl_i8: +; CHECK: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, + %a_z, + %b) + ret %out +} + +define @lsl_i16( %pg, %a, %b) { +; CHECK-LABEL: lsl_i16: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, + %a_z, + %b) + ret %out +} + +define @lsl_i32( %pg, %a, %b) { +; CHECK-LABEL: lsl_i32: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, + %a_z, + %b) + ret %out +} + +define @lsl_i64( %pg, %a, %b) { +; CHECK-LABEL: lsl_i64: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, + %a_z, + %b) + ret %out +} + +define @lsl_wide_i8( %pg, %a, %b) { +; CHECK-LABEL: lsl_wide_i8: +; CHECK-NOT: movprfx +; CHECK: lsl z0.b, p0/m, z0.b, z1.d + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.wide.nxv16i8( %pg, + %a_z, + %b) + ret %out +} + +define @lsl_wide_i16( %pg, %a, %b) { +; CHECK-LABEL: lsl_wide_i16: +; CHECK-NOT: movprfx +; CHECK: lsl z0.h, p0/m, z0.h, z1.d + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.wide.nxv8i16( %pg, + %a_z, + %b) + ret %out +} + +define @lsl_wide_i32( %pg, %a, %b) { +; CHECK-LABEL: lsl_wide_i32: +; CHECK-NOT: movprfx +; CHECK: lsl z0.s, p0/m, z0.s, z1.d + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.wide.nxv4i32( %pg, + %a_z, + %b) + ret %out +} + +; +; LSR +; + +define @lsr_i8( %pg, %a, %b) { +; CHECK-LABEL: lsr_i8: +; CHECK: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, + %a_z, + %b) + ret %out +} + +define @lsr_i16( %pg, %a, %b) { +; CHECK-LABEL: lsr_i16: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv8i16( %pg, + %a_z, + %b) + ret %out +} + +define @lsr_i32( %pg, %a, %b) { +; CHECK-LABEL: lsr_i32: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv4i32( %pg, + %a_z, + %b) + ret %out +} + +define @lsr_i64( %pg, %a, %b) { +; CHECK-LABEL: lsr_i64: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv2i64( %pg, + %a_z, + %b) + ret %out +} + +define @lsr_wide_i8( %pg, %a, %b) { +; CHECK-LABEL: lsr_wide_i8: +; CHECK-NOT: movprfx +; CHECK: lsr z0.b, p0/m, z0.b, z1.d + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.wide.nxv16i8( %pg, + %a_z, + %b) + ret %out +} + +define @lsr_wide_i16( %pg, %a, %b) { +; CHECK-LABEL: lsr_wide_i16: +; CHECK-NOT: movprfx +; CHECK: lsr z0.h, p0/m, z0.h, z1.d + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.wide.nxv8i16( %pg, + %a_z, + %b) + ret %out +} + +define @lsr_wide_i32( %pg, %a, %b) { +; CHECK-LABEL: lsr_wide_i32: +; CHECK-NOT: movprfx +; CHECK: lsr z0.s, p0/m, z0.s, z1.d + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.wide.nxv4i32( %pg, + %a_z, + %b) + ret %out +} + +declare @llvm.aarch64.sve.asr.nxv16i8(, , ) +declare @llvm.aarch64.sve.asr.nxv8i16(, , ) +declare @llvm.aarch64.sve.asr.nxv4i32(, , ) +declare @llvm.aarch64.sve.asr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.asr.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.asr.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.asr.wide.nxv4i32(, , ) + +declare @llvm.aarch64.sve.asrd.nxv16i8(, , i32) +declare @llvm.aarch64.sve.asrd.nxv8i16(, , i32) +declare @llvm.aarch64.sve.asrd.nxv4i32(, , i32) +declare @llvm.aarch64.sve.asrd.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.lsl.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsl.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsl.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsl.nxv2i64(, , ) + +declare @llvm.aarch64.sve.lsl.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsl.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsl.wide.nxv4i32(, , ) + +declare @llvm.aarch64.sve.lsr.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsr.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsr.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.lsr.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsr.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsr.wide.nxv4i32(, , )