diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -190,9 +190,9 @@ return SelectSVELogicalImm(N, VT, Imm); } - template - bool SelectSVEShiftImm64(SDValue N, SDValue &Imm) { - return SelectSVEShiftImm64(N, Low, High, Imm); + template + bool SelectSVEShiftImm(SDValue N, SDValue &Imm) { + return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); } // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. @@ -323,8 +323,8 @@ bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm); bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); - bool SelectSVEShiftImm64(SDValue N, uint64_t Low, uint64_t High, - SDValue &Imm); + bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, + bool AllowSaturation, SDValue &Imm); bool SelectSVEArithImm(SDValue N, SDValue &Imm); bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, @@ -3177,19 +3177,30 @@ return false; } -// This method is only needed to "cast" i64s into i32s when the value -// is a valid shift which has been splatted into a vector with i64 elements. -// Every other type is fine in tablegen. -bool AArch64DAGToDAGISel::SelectSVEShiftImm64(SDValue N, uint64_t Low, - uint64_t High, SDValue &Imm) { +// SVE shift intrinsics allow shift amounts larger than the element's bitwidth. +// Rather than attempt to normalise everything we can sometimes saturate the +// shift amount during selection. This function also allows for consistent +// isel patterns by ensuring the resulting "Imm" node is of the i32 type +// required by the instructions. +bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low, + uint64_t High, bool AllowSaturation, + SDValue &Imm) { if (auto *CN = dyn_cast(N)) { uint64_t ImmVal = CN->getZExtValue(); - SDLoc DL(N); - if (ImmVal >= Low && ImmVal <= High) { - Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); - return true; + // Reject shift amounts that are too small. + if (ImmVal < Low) + return false; + + // Reject or saturate shift amounts that are too big. + if (ImmVal > High) { + if (!AllowSaturation) + return false; + ImmVal = High; } + + Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); + return true; } return false; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1343,10 +1343,10 @@ defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl">; // Predicated shifts - defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0000, "asr", "ASR_ZPZI">; - defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0001, "lsr", "LSR_ZPZI">; - defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">; - defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>; + defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0000, "asr", "ASR_ZPZI", int_aarch64_sve_asr>; + defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0001, "lsr", "LSR_ZPZI", int_aarch64_sve_lsr>; + defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left_dup< 0b0011, "lsl", "LSL_ZPZI", int_aarch64_sve_lsl>; + defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right< 0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>; let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in { defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; @@ -2385,11 +2385,11 @@ } // SVE2 predicated shifts - defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl", "SQSHL_ZPZI">; - defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl", "UQSHL_ZPZI">; - defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr", "SRSHR_ZPZI", int_aarch64_sve_srshr>; - defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", "URSHR_ZPZI", int_aarch64_sve_urshr>; - defm SQSHLU_ZPmI : sve2_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>; + defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl", "SQSHL_ZPZI">; + defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl", "UQSHL_ZPZI">; + defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr", "SRSHR_ZPZI", int_aarch64_sve_srshr>; + defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", "URSHR_ZPZI", int_aarch64_sve_urshr>; + defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>; // SVE2 integer add/subtract long defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb", int_aarch64_sve_saddlb>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -209,7 +209,14 @@ def SVEArithUImmPat : ComplexPattern; def SVEArithSImmPat : ComplexPattern; -def SVEShiftImm64 : ComplexPattern", []>; +def SVEShiftImmL8 : ComplexPattern", []>; +def SVEShiftImmL16 : ComplexPattern", []>; +def SVEShiftImmL32 : ComplexPattern", []>; +def SVEShiftImmL64 : ComplexPattern", []>; +def SVEShiftImmR8 : ComplexPattern", []>; +def SVEShiftImmR16 : ComplexPattern", []>; +def SVEShiftImmR32 : ComplexPattern", []>; +def SVEShiftImmR64 : ComplexPattern", []>; class SVEExactFPImm : AsmOperandClass { let Name = "SVEExactFPImmOperand" # Suffix; @@ -315,11 +322,6 @@ : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))))), (inst $Op1, i32:$imm, i32:$shift)>; -class SVE_1_Op_Imm_Shift_Pred_Pat - : Pat<(vt (op (pt (AArch64ptrue 31)), (vt zprty:$Op1), (vt (AArch64dup (ImmTy:$imm))))), - (inst $Op1, ImmTy:$imm)>; - class SVE_1_Op_Imm_Arith_Pred_Pat : Pat<(vt (op (pt (AArch64ptrue 31)), (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm)))))), @@ -409,6 +411,18 @@ : Pat<(vt (op pt:$Pg, vt:$Src, inreg_vt, vt:$PassThru)), (inst $PassThru, $Pg, $Src)>; +class SVE_Shift_DupImm_Pred_Pat +: Pat<(vt (op pt:$Pg, vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))), + (inst $Pg, $Rn, i32:$imm)>; + +class SVE_Shift_DupImm_All_Active_Pat +: Pat<(vt (op (pt (AArch64ptrue 31)), vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))), + (inst $Rn, i32:$imm)>; + // // Pseudo -> Instruction mappings // @@ -4761,38 +4775,19 @@ let ElementSize = zprty.ElementSize; } -multiclass sve_int_bin_pred_shift_imm_left opc, string asm, string psName=""> { - def _B : SVEPseudo2Instr, +multiclass sve_int_bin_pred_shift_imm_left opc, string asm, string Ps, + SDPatternOperator op = null_frag> { + def _B : SVEPseudo2Instr, sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; - def _H : SVEPseudo2Instr, - sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { - let Inst{8} = imm{3}; - } - def _S : SVEPseudo2Instr, - sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { - let Inst{9-8} = imm{4-3}; - } - def _D : SVEPseudo2Instr, - sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { - let Inst{22} = imm{5}; - let Inst{9-8} = imm{4-3}; - } -} - -multiclass sve2_int_bin_pred_shift_imm_left opc, string asm, - string psName, - SDPatternOperator op> { - - def _B : SVEPseudo2Instr, sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; - def _H : SVEPseudo2Instr, + def _H : SVEPseudo2Instr, sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { let Inst{8} = imm{3}; } - def _S : SVEPseudo2Instr, + def _S : SVEPseudo2Instr, sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { let Inst{9-8} = imm{4-3}; } - def _D : SVEPseudo2Instr, + def _D : SVEPseudo2Instr, sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { let Inst{22} = imm{5}; let Inst{9-8} = imm{4-3}; @@ -4804,6 +4799,16 @@ def : SVE_3_Op_Imm_Pat(NAME # _D)>; } +// As above but shift amount takes the form of a "vector immediate". +multiclass sve_int_bin_pred_shift_imm_left_dup opc, string asm, + string Ps, SDPatternOperator op> +: sve_int_bin_pred_shift_imm_left { + def : SVE_Shift_DupImm_Pred_Pat(NAME # _B)>; + def : SVE_Shift_DupImm_Pred_Pat(NAME # _H)>; + def : SVE_Shift_DupImm_Pred_Pat(NAME # _S)>; + def : SVE_Shift_DupImm_Pred_Pat(NAME # _D)>; +} + multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd { def _ZERO_B : PredTwoOpImmPseudo; def _ZERO_H : PredTwoOpImmPseudo; @@ -4840,6 +4845,16 @@ def : SVE_3_Op_Imm_Pat(NAME # _D)>; } +// As above but shift amount takes the form of a "vector immediate". +multiclass sve_int_bin_pred_shift_imm_right_dup opc, string asm, + string Ps, SDPatternOperator op> +: sve_int_bin_pred_shift_imm_right { + def : SVE_Shift_DupImm_Pred_Pat(NAME # _B)>; + def : SVE_Shift_DupImm_Pred_Pat(NAME # _H)>; + def : SVE_Shift_DupImm_Pred_Pat(NAME # _S)>; + def : SVE_Shift_DupImm_Pred_Pat(NAME # _D)>; +} + multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd { def _ZERO_B : PredTwoOpImmPseudo; def _ZERO_H : PredTwoOpImmPseudo; @@ -4980,10 +4995,10 @@ let Inst{20-19} = imm{4-3}; } - def : SVE_1_Op_Imm_Shift_Pred_Pat(NAME # _B)>; - def : SVE_1_Op_Imm_Shift_Pred_Pat(NAME # _H)>; - def : SVE_1_Op_Imm_Shift_Pred_Pat(NAME # _S)>; - def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _D)>; + def : SVE_Shift_DupImm_All_Active_Pat(NAME # _B)>; + def : SVE_Shift_DupImm_All_Active_Pat(NAME # _H)>; + def : SVE_Shift_DupImm_All_Active_Pat(NAME # _S)>; + def : SVE_Shift_DupImm_All_Active_Pat(NAME # _D)>; } multiclass sve_int_bin_cons_shift_imm_right opc, string asm, @@ -5000,10 +5015,10 @@ let Inst{20-19} = imm{4-3}; } - def : SVE_1_Op_Imm_Shift_Pred_Pat(NAME # _B)>; - def : SVE_1_Op_Imm_Shift_Pred_Pat(NAME # _H)>; - def : SVE_1_Op_Imm_Shift_Pred_Pat(NAME # _S)>; - def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _D)>; + def : SVE_Shift_DupImm_All_Active_Pat(NAME # _B)>; + def : SVE_Shift_DupImm_All_Active_Pat(NAME # _H)>; + def : SVE_Shift_DupImm_All_Active_Pat(NAME # _S)>; + def : SVE_Shift_DupImm_All_Active_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// // SVE Memory - Store Group diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t @@ -8,8 +9,9 @@ define @smax_i8( %a) { ; CHECK-LABEL: smax_i8: -; CHECK: smax z0.b, z0.b, #-128 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.b, z0.b, #-128 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 -128, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -21,8 +23,9 @@ define @smax_i16( %a) { ; CHECK-LABEL: smax_i16: -; CHECK: smax z0.h, z0.h, #127 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.h, z0.h, #127 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 127, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -34,8 +37,9 @@ define @smax_i32( %a) { ; CHECK-LABEL: smax_i32: -; CHECK: smax z0.s, z0.s, #-128 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.s, z0.s, #-128 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 -128, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -47,8 +51,9 @@ define @smax_i64( %a) { ; CHECK-LABEL: smax_i64: -; CHECK: smax z0.d, z0.d, #127 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.d, z0.d, #127 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 127, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -62,8 +67,9 @@ define @smin_i8( %a) { ; CHECK-LABEL: smin_i8: -; CHECK: smin z0.b, z0.b, #127 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.b, z0.b, #127 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 127, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -75,8 +81,9 @@ define @smin_i16( %a) { ; CHECK-LABEL: smin_i16: -; CHECK: smin z0.h, z0.h, #-128 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.h, z0.h, #-128 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 -128, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -88,8 +95,9 @@ define @smin_i32( %a) { ; CHECK-LABEL: smin_i32: -; CHECK: smin z0.s, z0.s, #127 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.s, z0.s, #127 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 127, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -101,8 +109,9 @@ define @smin_i64( %a) { ; CHECK-LABEL: smin_i64: -; CHECK: smin z0.d, z0.d, #-128 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.d, z0.d, #-128 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 -128, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -116,8 +125,9 @@ define @umax_i8( %a) { ; CHECK-LABEL: umax_i8: -; CHECK: umax z0.b, z0.b, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.b, z0.b, #0 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 0, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -129,8 +139,9 @@ define @umax_i16( %a) { ; CHECK-LABEL: umax_i16: -; CHECK: umax z0.h, z0.h, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.h, z0.h, #255 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -142,8 +153,9 @@ define @umax_i32( %a) { ; CHECK-LABEL: umax_i32: -; CHECK: umax z0.s, z0.s, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.s, z0.s, #0 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 0, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -155,8 +167,9 @@ define @umax_i64( %a) { ; CHECK-LABEL: umax_i64: -; CHECK: umax z0.d, z0.d, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.d, z0.d, #255 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 255, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -170,8 +183,9 @@ define @umin_i8( %a) { ; CHECK-LABEL: umin_i8: -; CHECK: umin z0.b, z0.b, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.b, z0.b, #255 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -183,8 +197,9 @@ define @umin_i16( %a) { ; CHECK-LABEL: umin_i16: -; CHECK: umin z0.h, z0.h, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.h, z0.h, #0 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 0, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -196,8 +211,9 @@ define @umin_i32( %a) { ; CHECK-LABEL: umin_i32: -; CHECK: umin z0.s, z0.s, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.s, z0.s, #255 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -209,8 +225,9 @@ define @umin_i64( %a) { ; CHECK-LABEL: umin_i64: -; CHECK: umin z0.d, z0.d, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.d, z0.d, #0 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 0, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -224,8 +241,9 @@ define @sqadd_b_lowimm( %a) { ; CHECK-LABEL: sqadd_b_lowimm: -; CHECK: sqadd z0.b, z0.b, #27 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.b, z0.b, #27 // =0x1b +; CHECK-NEXT: ret %elt = insertelement undef, i8 27, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv16i8( %a, @@ -235,8 +253,9 @@ define @sqadd_h_lowimm( %a) { ; CHECK-LABEL: sqadd_h_lowimm: -; CHECK: sqadd z0.h, z0.h, #43 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.h, z0.h, #43 // =0x2b +; CHECK-NEXT: ret %elt = insertelement undef, i16 43, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv8i16( %a, @@ -246,8 +265,9 @@ define @sqadd_h_highimm( %a) { ; CHECK-LABEL: sqadd_h_highimm: -; CHECK: sqadd z0.h, z0.h, #2048 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.h, z0.h, #2048 // =0x800 +; CHECK-NEXT: ret %elt = insertelement undef, i16 2048, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv8i16( %a, @@ -257,8 +277,9 @@ define @sqadd_s_lowimm( %a) { ; CHECK-LABEL: sqadd_s_lowimm: -; CHECK: sqadd z0.s, z0.s, #1 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.s, z0.s, #1 // =0x1 +; CHECK-NEXT: ret %elt = insertelement undef, i32 1, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv4i32( %a, @@ -268,8 +289,9 @@ define @sqadd_s_highimm( %a) { ; CHECK-LABEL: sqadd_s_highimm: -; CHECK: sqadd z0.s, z0.s, #8192 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.s, z0.s, #8192 // =0x2000 +; CHECK-NEXT: ret %elt = insertelement undef, i32 8192, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv4i32( %a, @@ -279,8 +301,9 @@ define @sqadd_d_lowimm( %a) { ; CHECK-LABEL: sqadd_d_lowimm: -; CHECK: sqadd z0.d, z0.d, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.d, z0.d, #255 // =0xff +; CHECK-NEXT: ret %elt = insertelement undef, i64 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv2i64( %a, @@ -290,8 +313,9 @@ define @sqadd_d_highimm( %a) { ; CHECK-LABEL: sqadd_d_highimm: -; CHECK: sqadd z0.d, z0.d, #65280 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.d, z0.d, #65280 // =0xff00 +; CHECK-NEXT: ret %elt = insertelement undef, i64 65280, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv2i64( %a, @@ -303,8 +327,9 @@ define @sqsub_b_lowimm( %a) { ; CHECK-LABEL: sqsub_b_lowimm: -; CHECK: sqsub z0.b, z0.b, #27 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.b, z0.b, #27 // =0x1b +; CHECK-NEXT: ret %elt = insertelement undef, i8 27, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv16i8( %a, @@ -314,8 +339,9 @@ define @sqsub_h_lowimm( %a) { ; CHECK-LABEL: sqsub_h_lowimm: -; CHECK: sqsub z0.h, z0.h, #43 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.h, z0.h, #43 // =0x2b +; CHECK-NEXT: ret %elt = insertelement undef, i16 43, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv8i16( %a, @@ -325,8 +351,9 @@ define @sqsub_h_highimm( %a) { ; CHECK-LABEL: sqsub_h_highimm: -; CHECK: sqsub z0.h, z0.h, #2048 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.h, z0.h, #2048 // =0x800 +; CHECK-NEXT: ret %elt = insertelement undef, i16 2048, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv8i16( %a, @@ -336,8 +363,9 @@ define @sqsub_s_lowimm( %a) { ; CHECK-LABEL: sqsub_s_lowimm: -; CHECK: sqsub z0.s, z0.s, #1 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.s, z0.s, #1 // =0x1 +; CHECK-NEXT: ret %elt = insertelement undef, i32 1, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv4i32( %a, @@ -347,8 +375,9 @@ define @sqsub_s_highimm( %a) { ; CHECK-LABEL: sqsub_s_highimm: -; CHECK: sqsub z0.s, z0.s, #8192 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.s, z0.s, #8192 // =0x2000 +; CHECK-NEXT: ret %elt = insertelement undef, i32 8192, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv4i32( %a, @@ -358,8 +387,9 @@ define @sqsub_d_lowimm( %a) { ; CHECK-LABEL: sqsub_d_lowimm: -; CHECK: sqsub z0.d, z0.d, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.d, z0.d, #255 // =0xff +; CHECK-NEXT: ret %elt = insertelement undef, i64 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv2i64( %a, @@ -369,8 +399,9 @@ define @sqsub_d_highimm( %a) { ; CHECK-LABEL: sqsub_d_highimm: -; CHECK: sqsub z0.d, z0.d, #65280 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.d, z0.d, #65280 // =0xff00 +; CHECK-NEXT: ret %elt = insertelement undef, i64 65280, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv2i64( %a, @@ -382,8 +413,9 @@ define @uqadd_b_lowimm( %a) { ; CHECK-LABEL: uqadd_b_lowimm: -; CHECK: uqadd z0.b, z0.b, #27 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.b, z0.b, #27 // =0x1b +; CHECK-NEXT: ret %elt = insertelement undef, i8 27, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv16i8( %a, @@ -393,8 +425,9 @@ define @uqadd_h_lowimm( %a) { ; CHECK-LABEL: uqadd_h_lowimm: -; CHECK: uqadd z0.h, z0.h, #43 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.h, z0.h, #43 // =0x2b +; CHECK-NEXT: ret %elt = insertelement undef, i16 43, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv8i16( %a, @@ -404,8 +437,9 @@ define @uqadd_h_highimm( %a) { ; CHECK-LABEL: uqadd_h_highimm: -; CHECK: uqadd z0.h, z0.h, #2048 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.h, z0.h, #2048 // =0x800 +; CHECK-NEXT: ret %elt = insertelement undef, i16 2048, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv8i16( %a, @@ -415,8 +449,9 @@ define @uqadd_s_lowimm( %a) { ; CHECK-LABEL: uqadd_s_lowimm: -; CHECK: uqadd z0.s, z0.s, #1 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.s, z0.s, #1 // =0x1 +; CHECK-NEXT: ret %elt = insertelement undef, i32 1, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv4i32( %a, @@ -428,8 +463,9 @@ define @uqsub_b_lowimm( %a) { ; CHECK-LABEL: uqsub_b_lowimm: -; CHECK: uqsub z0.b, z0.b, #27 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.b, z0.b, #27 // =0x1b +; CHECK-NEXT: ret %elt = insertelement undef, i8 27, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv16i8( %a, @@ -439,8 +475,9 @@ define @uqsub_h_lowimm( %a) { ; CHECK-LABEL: uqsub_h_lowimm: -; CHECK: uqsub z0.h, z0.h, #43 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.h, z0.h, #43 // =0x2b +; CHECK-NEXT: ret %elt = insertelement undef, i16 43, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv8i16( %a, @@ -450,8 +487,9 @@ define @uqsub_h_highimm( %a) { ; CHECK-LABEL: uqsub_h_highimm: -; CHECK: uqsub z0.h, z0.h, #2048 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.h, z0.h, #2048 // =0x800 +; CHECK-NEXT: ret %elt = insertelement undef, i16 2048, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv8i16( %a, @@ -461,8 +499,9 @@ define @uqsub_s_lowimm( %a) { ; CHECK-LABEL: uqsub_s_lowimm: -; CHECK: uqsub z0.s, z0.s, #1 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.s, z0.s, #1 // =0x1 +; CHECK-NEXT: ret %elt = insertelement undef, i32 1, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv4i32( %a, @@ -472,8 +511,9 @@ define @uqsub_s_highimm( %a) { ; CHECK-LABEL: uqsub_s_highimm: -; CHECK: uqsub z0.s, z0.s, #8192 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.s, z0.s, #8192 // =0x2000 +; CHECK-NEXT: ret %elt = insertelement undef, i32 8192, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv4i32( %a, @@ -483,8 +523,9 @@ define @uqsub_d_lowimm( %a) { ; CHECK-LABEL: uqsub_d_lowimm: -; CHECK: uqsub z0.d, z0.d, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.d, z0.d, #255 // =0xff +; CHECK-NEXT: ret %elt = insertelement undef, i64 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv2i64( %a, @@ -494,8 +535,9 @@ define @uqsub_d_highimm( %a) { ; CHECK-LABEL: uqsub_d_highimm: -; CHECK: uqsub z0.d, z0.d, #65280 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.d, z0.d, #65280 // =0xff00 +; CHECK-NEXT: ret %elt = insertelement undef, i64 65280, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv2i64( %a, @@ -506,8 +548,9 @@ define @uqadd_s_highimm( %a) { ; CHECK-LABEL: uqadd_s_highimm: -; CHECK: uqadd z0.s, z0.s, #8192 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.s, z0.s, #8192 // =0x2000 +; CHECK-NEXT: ret %elt = insertelement undef, i32 8192, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv4i32( %a, @@ -517,8 +560,9 @@ define @uqadd_d_lowimm( %a) { ; CHECK-LABEL: uqadd_d_lowimm: -; CHECK: uqadd z0.d, z0.d, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.d, z0.d, #255 // =0xff +; CHECK-NEXT: ret %elt = insertelement undef, i64 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv2i64( %a, @@ -528,8 +572,9 @@ define @uqadd_d_highimm( %a) { ; CHECK-LABEL: uqadd_d_highimm: -; CHECK: uqadd z0.d, z0.d, #65280 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.d, z0.d, #65280 // =0xff00 +; CHECK-NEXT: ret %elt = insertelement undef, i64 65280, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv2i64( %a, @@ -539,10 +584,24 @@ ; ASR -define @asr_i8( %a) { +define @asr_i8( %pg, %a) { ; CHECK-LABEL: asr_i8: -; CHECK: asr z0.b, z0.b, #8 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.b, p0/m, z0.b, #8 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 9, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @asr_i8_all_active( %a) { +; CHECK-LABEL: asr_i8_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.b, z0.b, #8 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 8, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -552,10 +611,37 @@ ret %out } -define @asr_i16( %a) { +; Ensure we don't match a right shift by zero to the immediate form. +define @asr_i8_too_small( %pg, %a) { +; CHECK-LABEL: asr_i8_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, #0 // =0x0 +; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv16i8( %pg, + %a, + zeroinitializer) + ret %out +} + +define @asr_i16( %pg, %a) { ; CHECK-LABEL: asr_i16: -; CHECK: asr z0.h, z0.h, #16 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #16 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 17, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @asr_i16_all_active( %a) { +; CHECK-LABEL: asr_i16_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.h, z0.h, #16 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 16, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -565,10 +651,37 @@ ret %out } -define @asr_i32( %a) { +; Ensure we don't match a right shift by zero to the immediate form. +define @asr_i16_too_small( %pg, %a) { +; CHECK-LABEL: asr_i16_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.h, #0 // =0x0 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv8i16( %pg, + %a, + zeroinitializer) + ret %out +} + +define @asr_i32( %pg, %a) { ; CHECK-LABEL: asr_i32: -; CHECK: asr z0.s, z0.s, #32 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #32 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 33, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @asr_i32_all_active( %a) { +; CHECK-LABEL: asr_i32_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.s, z0.s, #32 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 32, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -578,10 +691,37 @@ ret %out } -define @asr_i64( %a) { +; Ensure we don't match a right shift by zero to the immediate form. +define @asr_i32_too_small( %pg, %a) { +; CHECK-LABEL: asr_i32_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #0 // =0x0 +; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv4i32( %pg, + %a, + zeroinitializer) + ret %out +} + +define @asr_i64( %pg, %a) { ; CHECK-LABEL: asr_i64: -; CHECK: asr z0.d, z0.d, #64 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.d, p0/m, z0.d, #64 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 65, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv2i64( %pg, + %a, + %splat) + ret %out +} + +define @asr_i64_all_active( %a) { +; CHECK-LABEL: asr_i64_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.d, z0.d, #64 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 64, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -591,12 +731,39 @@ ret %out } +; Ensure we don't match a right shift by zero to the immediate form. +define @asr_i64_too_small( %pg, %a) { +; CHECK-LABEL: asr_i64_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #0 // =0x0 +; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv2i64( %pg, + %a, + zeroinitializer) + ret %out +} + ; LSL -define @lsl_i8( %a) { +define @lsl_i8( %pg, %a) { ; CHECK-LABEL: lsl_i8: -; CHECK: lsl z0.b, z0.b, #7 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 7, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i8_all_active( %a) { +; CHECK-LABEL: lsl_i8_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.b, z0.b, #7 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 7, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -606,10 +773,50 @@ ret %out } -define @lsl_i16( %a) { +; Ensure we don't match a left shift bigger than its bitwidth to the immediate form. +define @lsl_i8_too_big( %pg, %a) { +; CHECK-LABEL: lsl_i8_too_big: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, #8 // =0x8 +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %elt = insertelement undef, i8 8, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i8_zero( %pg, %a) { +; CHECK-LABEL: lsl_i8_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, + %a, + zeroinitializer) + ret %out +} + +define @lsl_i16( %pg, %a) { ; CHECK-LABEL: lsl_i16: -; CHECK: lsl z0.h, z0.h, #15 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 15, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i16_all_active( %a) { +; CHECK-LABEL: lsl_i16_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.h, z0.h, #15 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 15, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -619,10 +826,50 @@ ret %out } -define @lsl_i32( %a) { +; Ensure we don't match a left shift bigger than its bitwidth to the immediate form. +define @lsl_i16_too_big( %pg, %a) { +; CHECK-LABEL: lsl_i16_too_big: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.h, #16 // =0x10 +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %elt = insertelement undef, i16 16, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i16_zero( %pg, %a) { +; CHECK-LABEL: lsl_i16_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, + %a, + zeroinitializer) + ret %out +} + +define @lsl_i32( %pg, %a) { ; CHECK-LABEL: lsl_i32: -; CHECK: lsl z0.s, z0.s, #31 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 31, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i32_all_active( %a) { +; CHECK-LABEL: lsl_i32_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, z0.s, #31 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 31, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -632,10 +879,50 @@ ret %out } -define @lsl_i64( %a) { +; Ensure we don't match a left shift bigger than its bitwidth to the immediate form. +define @lsl_i32_too_big( %pg, %a) { +; CHECK-LABEL: lsl_i32_too_big: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #32 // =0x20 +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %elt = insertelement undef, i32 32, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i32_zero( %pg, %a) { +; CHECK-LABEL: lsl_i32_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, + %a, + zeroinitializer) + ret %out +} + +define @lsl_i64( %pg, %a) { ; CHECK-LABEL: lsl_i64: -; CHECK: lsl z0.d, z0.d, #63 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 63, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i64_all_active( %a) { +; CHECK-LABEL: lsl_i64_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.d, z0.d, #63 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 63, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -645,12 +932,52 @@ ret %out } +; Ensure we don't match a left shift bigger than its bitwidth to the immediate form. +define @lsl_i64_too_big( %pg, %a) { +; CHECK-LABEL: lsl_i64_too_big: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #64 // =0x40 +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %elt = insertelement undef, i64 64, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i64_zero( %pg, %a) { +; CHECK-LABEL: lsl_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, + %a, + zeroinitializer) + ret %out +} + ; LSR -define @lsr_i8( %a) { +define @lsr_i8( %pg, %a) { ; CHECK-LABEL: lsr_i8: -; CHECK: lsr z0.b, z0.b, #8 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #8 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 9, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @lsr_i8_all_active( %a) { +; CHECK-LABEL: lsr_i8_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.b, z0.b, #8 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 8, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -660,10 +987,37 @@ ret %out } -define @lsr_i16( %a) { +; Ensure we don't match a right shift by zero to the immediate form. +define @lsr_i8_too_small( %pg, %a) { +; CHECK-LABEL: lsr_i8_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, #0 // =0x0 +; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, + %a, + zeroinitializer) + ret %out +} + +define @lsr_i16( %pg, %a) { ; CHECK-LABEL: lsr_i16: -; CHECK: lsr z0.h, z0.h, #16 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #16 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 17, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @lsr_i16_all_active( %a) { +; CHECK-LABEL: lsr_i16_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.h, z0.h, #16 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 16, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -673,10 +1027,37 @@ ret %out } -define @lsr_i32( %a) { +; Ensure we don't match a right shift by zero to the immediate form. +define @lsr_i16_too_small( %pg, %a) { +; CHECK-LABEL: lsr_i16_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.h, #0 // =0x0 +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv8i16( %pg, + %a, + zeroinitializer) + ret %out +} + +define @lsr_i32( %pg, %a) { ; CHECK-LABEL: lsr_i32: -; CHECK: lsr z0.s, z0.s, #32 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #32 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 33, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @lsr_i32_all_active( %a) { +; CHECK-LABEL: lsr_i32_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.s, z0.s, #32 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 32, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -686,10 +1067,37 @@ ret %out } -define @lsr_i64( %a) { +; Ensure we don't match a right shift by zero to the immediate form. +define @lsr_i32_too_small( %pg, %a) { +; CHECK-LABEL: lsr_i32_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #0 // =0x0 +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv4i32( %pg, + %a, + zeroinitializer) + ret %out +} + +define @lsr_i64( %pg, %a) { ; CHECK-LABEL: lsr_i64: -; CHECK: lsr z0.d, z0.d, #64 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #64 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 65, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv2i64( %pg, + %a, + %splat) + ret %out +} + +define @lsr_i64_all_active( %a) { +; CHECK-LABEL: lsr_i64_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.d, z0.d, #64 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 64, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -699,6 +1107,19 @@ ret %out } +; Ensure we don't match a right shift by zero to the immediate form. +define @lsr_i64_too_small( %pg, %a) { +; CHECK-LABEL: lsr_i64_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #0 // =0x0 +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv2i64( %pg, + %a, + zeroinitializer) + ret %out +} + declare @llvm.aarch64.sve.sqadd.x.nxv16i8(, ) declare @llvm.aarch64.sve.sqadd.x.nxv8i16(, ) declare @llvm.aarch64.sve.sqadd.x.nxv4i32(, )