diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -72,7 +72,7 @@ ADC, SBC, // adc, sbc instructions - // Arithmetic instructions + // Predicated instructions where inactive lanes produce undefined results. ADD_PRED, FADD_PRED, FDIV_PRED, @@ -80,14 +80,14 @@ FMUL_PRED, FSUB_PRED, SDIV_PRED, + SHL_PRED, + SMAX_PRED, + SMIN_PRED, + SRA_PRED, + SRL_PRED, UDIV_PRED, - SMIN_MERGE_OP1, - UMIN_MERGE_OP1, - SMAX_MERGE_OP1, - UMAX_MERGE_OP1, - SHL_MERGE_OP1, - SRL_MERGE_OP1, - SRA_MERGE_OP1, + UMAX_PRED, + UMIN_PRED, SETCC_MERGE_ZERO, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1397,14 +1397,14 @@ MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ) MAKE_CASE(AArch64ISD::ADD_PRED) MAKE_CASE(AArch64ISD::SDIV_PRED) + MAKE_CASE(AArch64ISD::SHL_PRED) + MAKE_CASE(AArch64ISD::SMAX_PRED) + MAKE_CASE(AArch64ISD::SMIN_PRED) + MAKE_CASE(AArch64ISD::SRA_PRED) + MAKE_CASE(AArch64ISD::SRL_PRED) MAKE_CASE(AArch64ISD::UDIV_PRED) - MAKE_CASE(AArch64ISD::SMIN_MERGE_OP1) - MAKE_CASE(AArch64ISD::UMIN_MERGE_OP1) - MAKE_CASE(AArch64ISD::SMAX_MERGE_OP1) - MAKE_CASE(AArch64ISD::UMAX_MERGE_OP1) - MAKE_CASE(AArch64ISD::SHL_MERGE_OP1) - MAKE_CASE(AArch64ISD::SRL_MERGE_OP1) - MAKE_CASE(AArch64ISD::SRA_MERGE_OP1) + MAKE_CASE(AArch64ISD::UMAX_PRED) + MAKE_CASE(AArch64ISD::UMIN_PRED) MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO) MAKE_CASE(AArch64ISD::ADC) MAKE_CASE(AArch64ISD::SBC) @@ -3540,13 +3540,13 @@ case ISD::UDIV: return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_PRED); case ISD::SMIN: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_MERGE_OP1); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED); case ISD::UMIN: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_MERGE_OP1); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED); case ISD::SMAX: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_MERGE_OP1); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED); case ISD::UMAX: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_MERGE_OP1); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED); case ISD::SRA: case ISD::SRL: case ISD::SHL: @@ -8914,7 +8914,7 @@ case ISD::SHL: if (VT.isScalableVector()) - return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_MERGE_OP1); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED); if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0), @@ -8926,8 +8926,8 @@ case ISD::SRA: case ISD::SRL: if (VT.isScalableVector()) { - unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_MERGE_OP1 - : AArch64ISD::SRL_MERGE_OP1; + unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED + : AArch64ISD::SRL_PRED; return LowerToPredicatedOp(Op, DAG, Opc); } @@ -11940,6 +11940,25 @@ Zero); } +// If a merged operation has no inactive lanes we can relax it to a predicated +// or unpredicated operation, which potentially allows better isel (perhaps +// using immediate forms) or relaxing register reuse requirements. +static SDValue convertMergedOpToPredOp(SDNode *N, unsigned PredOpc, + SelectionDAG &DAG) { + assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!"); + assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!"); + SDValue Pg = N->getOperand(1); + + // ISD way to specify an all active predicate. + if ((Pg.getOpcode() == AArch64ISD::PTRUE) && + (Pg.getConstantOperandVal(0) == AArch64SVEPredPattern::all)) + return DAG.getNode(PredOpc, SDLoc(N), N->getValueType(0), Pg, + N->getOperand(2), N->getOperand(3)); + + // FUTURE: SplatVector(true) + return SDValue(); +} + static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { @@ -12018,26 +12037,19 @@ case Intrinsic::aarch64_sve_ext: return LowerSVEIntrinsicEXT(N, DAG); case Intrinsic::aarch64_sve_smin: - return DAG.getNode(AArch64ISD::SMIN_MERGE_OP1, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); + return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG); case Intrinsic::aarch64_sve_umin: - return DAG.getNode(AArch64ISD::UMIN_MERGE_OP1, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); + return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG); case Intrinsic::aarch64_sve_smax: - return DAG.getNode(AArch64ISD::SMAX_MERGE_OP1, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); + return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG); case Intrinsic::aarch64_sve_umax: - return DAG.getNode(AArch64ISD::UMAX_MERGE_OP1, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); + return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG); case Intrinsic::aarch64_sve_lsl: - return DAG.getNode(AArch64ISD::SHL_MERGE_OP1, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); + return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG); case Intrinsic::aarch64_sve_lsr: - return DAG.getNode(AArch64ISD::SRL_MERGE_OP1, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); + return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG); case Intrinsic::aarch64_sve_asr: - return DAG.getNode(AArch64ISD::SRA_MERGE_OP1, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); + return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG); case Intrinsic::aarch64_sve_cmphs: if (!N->getOperand(2).getValueType().isFloatingPoint()) return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -174,22 +174,20 @@ // Predicated operations with the result of inactive lanes being unspecified. def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>; +def AArch64asr_p : SDNode<"AArch64ISD::SRA_PRED", SDT_AArch64Arith>; def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>; def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>; def AArch64fma_p : SDNode<"AArch64ISD::FMA_PRED", SDT_AArch64FMA>; def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>; def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>; +def AArch64lsl_p : SDNode<"AArch64ISD::SHL_PRED", SDT_AArch64Arith>; +def AArch64lsr_p : SDNode<"AArch64ISD::SRL_PRED", SDT_AArch64Arith>; def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>; +def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>; +def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>; def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>; - -// Merging op1 into the inactive lanes. -def AArch64smin_m1 : SDNode<"AArch64ISD::SMIN_MERGE_OP1", SDT_AArch64Arith>; -def AArch64umin_m1 : SDNode<"AArch64ISD::UMIN_MERGE_OP1", SDT_AArch64Arith>; -def AArch64smax_m1 : SDNode<"AArch64ISD::SMAX_MERGE_OP1", SDT_AArch64Arith>; -def AArch64umax_m1 : SDNode<"AArch64ISD::UMAX_MERGE_OP1", SDT_AArch64Arith>; -def AArch64lsl_m1 : SDNode<"AArch64ISD::SHL_MERGE_OP1", SDT_AArch64Arith>; -def AArch64lsr_m1 : SDNode<"AArch64ISD::SRL_MERGE_OP1", SDT_AArch64Arith>; -def AArch64asr_m1 : SDNode<"AArch64ISD::SRA_MERGE_OP1", SDT_AArch64Arith>; +def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>; +def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>; def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>; def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; @@ -287,10 +285,10 @@ defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor>; defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and>; - defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_m1>; - defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_m1>; - defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_m1>; - defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_m1>; + defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_p>; + defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_p>; + defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_p>; + defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_p>; defm MUL_ZI : sve_int_arith_imm2<"mul", mul>; defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", int_aarch64_sve_mul>; @@ -343,12 +341,17 @@ defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", int_aarch64_sve_fabs>; defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", int_aarch64_sve_fneg>; - defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", AArch64smax_m1>; - defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", AArch64umax_m1>; - defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", AArch64smin_m1>; - defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", AArch64umin_m1>; - defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", int_aarch64_sve_sabd>; - defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", int_aarch64_sve_uabd>; + defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", int_aarch64_sve_smax, DestructiveBinaryComm>; + defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", int_aarch64_sve_umax, DestructiveBinaryComm>; + defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", "SMIN_ZPZZ", int_aarch64_sve_smin, DestructiveBinaryComm>; + defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", "UMIN_ZPZZ", int_aarch64_sve_umin, DestructiveBinaryComm>; + defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", "SABD_ZPZZ", int_aarch64_sve_sabd, DestructiveBinaryComm>; + defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", "UABD_ZPZZ", int_aarch64_sve_uabd, DestructiveBinaryComm>; + + defm SMAX_ZPZZ : sve_int_bin_pred_bhsd; + defm UMAX_ZPZZ : sve_int_bin_pred_bhsd; + defm SMIN_ZPZZ : sve_int_bin_pred_bhsd; + defm UMIN_ZPZZ : sve_int_bin_pred_bhsd; defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", int_aarch64_sve_frecpe_x>; defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", int_aarch64_sve_frsqrte_x>; @@ -1313,9 +1316,9 @@ defm INDEX_II : sve_int_index_ii<"index", index_vector>; // Unpredicated shifts - defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_m1>; - defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_m1>; - defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_m1>; + defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_p>; + defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_p>; + defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_p>; defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr">; defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr">; @@ -1328,19 +1331,23 @@ defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>; let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in { - defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; - defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; - defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd; } - defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", AArch64asr_m1, "ASRR_ZPmZ">; - defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", AArch64lsr_m1, "LSRR_ZPmZ">; - defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", AArch64lsl_m1, "LSLR_ZPmZ">; + defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ">; + defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ">; + defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ">; defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", "ASRR_ZPZZ", null_frag, "ASR_ZPmZ", /*isReverseInstr*/ 1>; defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", "LSRR_ZPZZ", null_frag, "LSR_ZPmZ", /*isReverseInstr*/ 1>; defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", "LSLR_ZPZZ", null_frag, "LSL_ZPmZ", /*isReverseInstr*/ 1>; + defm ASR_ZPZZ : sve_int_bin_pred_bhsd; + defm LSR_ZPZZ : sve_int_bin_pred_bhsd; + defm LSL_ZPZZ : sve_int_bin_pred_bhsd; + defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>; defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>; defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2382,11 +2382,19 @@ def : SVE_3_Op_Pat(NAME # _D)>; } -multiclass sve_int_bin_pred_arit_1 opc, string asm, SDPatternOperator op> { - def _B : sve_int_bin_pred_arit_log<0b00, 0b01, opc, asm, ZPR8>; - def _H : sve_int_bin_pred_arit_log<0b01, 0b01, opc, asm, ZPR16>; - def _S : sve_int_bin_pred_arit_log<0b10, 0b01, opc, asm, ZPR32>; - def _D : sve_int_bin_pred_arit_log<0b11, 0b01, opc, asm, ZPR64>; +multiclass sve_int_bin_pred_arit_1 opc, string asm, string Ps, + SDPatternOperator op, + DestructiveInstTypeEnum flags> { + let DestructiveInstType = flags in { + def _B : sve_int_bin_pred_arit_log<0b00, 0b01, opc, asm, ZPR8>, + SVEPseudo2Instr; + def _H : sve_int_bin_pred_arit_log<0b01, 0b01, opc, asm, ZPR16>, + SVEPseudo2Instr; + def _S : sve_int_bin_pred_arit_log<0b10, 0b01, opc, asm, ZPR32>, + SVEPseudo2Instr; + def _D : sve_int_bin_pred_arit_log<0b11, 0b01, opc, asm, ZPR64>, + SVEPseudo2Instr; + } def : SVE_3_Op_Pat(NAME # _B)>; def : SVE_3_Op_Pat(NAME # _H)>; diff --git a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll --- a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll +++ b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll @@ -182,7 +182,7 @@ ; SMIN ; -define @smin_i8( %a, %b, %c) { +define @smin_i8( %a, %b) { ; CHECK-LABEL: smin_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b @@ -193,7 +193,7 @@ ret %min } -define @smin_i16( %a, %b, %c) { +define @smin_i16( %a, %b) { ; CHECK-LABEL: smin_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -204,7 +204,7 @@ ret %min } -define @smin_i32( %a, %b, %c) { +define @smin_i32( %a, %b) { ; CHECK-LABEL: smin_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -215,7 +215,7 @@ ret %min } -define @smin_i64( %a, %b, %c) { +define @smin_i64( %a, %b) { ; CHECK-LABEL: smin_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -226,7 +226,7 @@ ret %min } -define @smin_split_i8( %a, %b, %c) { +define @smin_split_i8( %a, %b) { ; CHECK-LABEL: smin_split_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b @@ -238,7 +238,7 @@ ret %min } -define @smin_split_i16( %a, %b, %c) { +define @smin_split_i16( %a, %b) { ; CHECK-LABEL: smin_split_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -252,7 +252,7 @@ ret %min } -define @smin_split_i32( %a, %b, %c) { +define @smin_split_i32( %a, %b) { ; CHECK-LABEL: smin_split_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -264,7 +264,7 @@ ret %min } -define @smin_split_i64( %a, %b, %c) { +define @smin_split_i64( %a, %b) { ; CHECK-LABEL: smin_split_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -276,7 +276,7 @@ ret %min } -define @smin_promote_i8( %a, %b, %c) { +define @smin_promote_i8( %a, %b) { ; CHECK-LABEL: smin_promote_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -289,7 +289,7 @@ ret %min } -define @smin_promote_i16( %a, %b, %c) { +define @smin_promote_i16( %a, %b) { ; CHECK-LABEL: smin_promote_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -302,7 +302,7 @@ ret %min } -define @smin_promote_i32( %a, %b, %c) { +define @smin_promote_i32( %a, %b) { ; CHECK-LABEL: smin_promote_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -319,7 +319,7 @@ ; UMIN ; -define @umin_i8( %a, %b, %c) { +define @umin_i8( %a, %b) { ; CHECK-LABEL: umin_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b @@ -330,7 +330,7 @@ ret %min } -define @umin_i16( %a, %b, %c) { +define @umin_i16( %a, %b) { ; CHECK-LABEL: umin_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -341,7 +341,7 @@ ret %min } -define @umin_i32( %a, %b, %c) { +define @umin_i32( %a, %b) { ; CHECK-LABEL: umin_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -352,7 +352,7 @@ ret %min } -define @umin_i64( %a, %b, %c) { +define @umin_i64( %a, %b) { ; CHECK-LABEL: umin_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -363,7 +363,7 @@ ret %min } -define @umin_split_i64( %a, %b, %c) { +define @umin_split_i64( %a, %b) { ; CHECK-LABEL: umin_split_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -375,7 +375,7 @@ ret %min } -define @umin_promote_i8( %a, %b, %c) { +define @umin_promote_i8( %a, %b) { ; CHECK-LABEL: umin_promote_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -392,7 +392,7 @@ ; SMAX ; -define @smax_i8( %a, %b, %c) { +define @smax_i8( %a, %b) { ; CHECK-LABEL: smax_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b @@ -403,7 +403,7 @@ ret %max } -define @smax_i16( %a, %b, %c) { +define @smax_i16( %a, %b) { ; CHECK-LABEL: smax_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -414,7 +414,7 @@ ret %max } -define @smax_i32( %a, %b, %c) { +define @smax_i32( %a, %b) { ; CHECK-LABEL: smax_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -425,7 +425,7 @@ ret %max } -define @smax_i64( %a, %b, %c) { +define @smax_i64( %a, %b) { ; CHECK-LABEL: smax_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -436,7 +436,7 @@ ret %max } -define @smax_split_i32( %a, %b, %c) { +define @smax_split_i32( %a, %b) { ; CHECK-LABEL: smax_split_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -448,7 +448,7 @@ ret %max } -define @smax_promote_i16( %a, %b, %c) { +define @smax_promote_i16( %a, %b) { ; CHECK-LABEL: smax_promote_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -465,7 +465,7 @@ ; UMAX ; -define @umax_i8( %a, %b, %c) { +define @umax_i8( %a, %b) { ; CHECK-LABEL: umax_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b @@ -476,7 +476,7 @@ ret %max } -define @umax_i16( %a, %b, %c) { +define @umax_i16( %a, %b) { ; CHECK-LABEL: umax_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -487,7 +487,7 @@ ret %max } -define @umax_i32( %a, %b, %c) { +define @umax_i32( %a, %b) { ; CHECK-LABEL: umax_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -498,7 +498,7 @@ ret %max } -define @umax_i64( %a, %b, %c) { +define @umax_i64( %a, %b) { ; CHECK-LABEL: umax_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -509,7 +509,7 @@ ret %max } -define @umax_split_i16( %a, %b, %c) { +define @umax_split_i16( %a, %b) { ; CHECK-LABEL: umax_split_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -521,7 +521,7 @@ ret %max } -define @umax_promote_i32( %a, %b, %c) { +define @umax_promote_i32( %a, %b) { ; CHECK-LABEL: umax_promote_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -601,6 +601,50 @@ ret %shr } +; +; ASRR +; + +define @asrr_i8( %a, %b){ +; CHECK-LABEL: asrr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: asrr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %shr = ashr %b, %a + ret %shr +} + +define @asrr_i16( %a, %b){ +; CHECK-LABEL: asrr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: asrr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %shr = ashr %b, %a + ret %shr +} + +define @asrr_i32( %a, %b){ +; CHECK-LABEL: asrr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: asrr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %shr = ashr %b, %a + ret %shr +} + +define @asrr_i64( %a, %b){ +; CHECK-LABEL: asrr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: asrr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %shr = ashr %b, %a + ret %shr +} + ; ; LSL ; @@ -667,6 +711,50 @@ ret %shl } +; +; LSLR +; + +define @lslr_i8( %a, %b){ +; CHECK-LABEL: lslr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: lslr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %shl = shl %b, %a + ret %shl +} + +define @lslr_i16( %a, %b){ +; CHECK-LABEL: lslr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: lslr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %shl = shl %b, %a + ret %shl +} + +define @lslr_i32( %a, %b){ +; CHECK-LABEL: lslr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lslr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %shl = shl %b, %a + ret %shl +} + +define @lslr_i64( %a, %b){ +; CHECK-LABEL: lslr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: lslr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %shl = shl %b, %a + ret %shl +} + ; ; LSR ; @@ -734,6 +822,50 @@ ret %shr } +; +; LSRR +; + +define @lsrr_i8( %a, %b){ +; CHECK-LABEL: lsrr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: lsrr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %shr = lshr %b, %a + ret %shr +} + +define @lsrr_i16( %a, %b){ +; CHECK-LABEL: lsrr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: lsrr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %shr = lshr %b, %a + ret %shr +} + +define @lsrr_i32( %a, %b){ +; CHECK-LABEL: lsrr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsrr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %shr = lshr %b, %a + ret %shr +} + +define @lsrr_i64( %a, %b){ +; CHECK-LABEL: lsrr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: lsrr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %shr = lshr %b, %a + ret %shr +} + ; ; CMP ;