diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -423,6 +423,9 @@ case AArch64::DestructiveBinaryImm: std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3); break; + case AArch64::ConstructiveUnaryPassthru: + std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 1, 3); + break; case AArch64::DestructiveTernaryCommWithRev: std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4); if (DstReg == MI.getOperand(3).getReg()) { @@ -439,7 +442,6 @@ llvm_unreachable("Unsupported Destructive Operand type"); } -#ifndef NDEBUG // MOVPRFX can only be used if the destination operand // is the destructive operand, not as any other operand, // so the Destructive Operand must be unique. @@ -451,6 +453,7 @@ DstReg != MI.getOperand(DOPIdx).getReg() || MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg(); break; + case AArch64::ConstructiveUnaryPassthru: case AArch64::DestructiveBinaryImm: DOPRegIsUnique = true; break; @@ -461,7 +464,6 @@ MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg()); break; } -#endif // Resolve the reverse opcode if (UseRev) { @@ -504,9 +506,7 @@ // MachineInstrBuilder PRFX, DOP; if (FalseZero) { -#ifndef NDEBUG assert(DOPRegIsUnique && "The destructive operand should be unique"); -#endif assert(ElementSize != AArch64::ElementSizeNone && "This instruction is unpredicated"); @@ -518,14 +518,20 @@ // After the movprfx, the destructive operand is same as Dst DOPIdx = 0; - } else if (DstReg != MI.getOperand(DOPIdx).getReg()) { -#ifndef NDEBUG + } else if (DType != AArch64::ConstructiveUnaryPassthru && + DstReg != MI.getOperand(DOPIdx).getReg()) { assert(DOPRegIsUnique && "The destructive operand should be unique"); -#endif PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx)) .addReg(DstReg, RegState::Define) .addReg(MI.getOperand(DOPIdx).getReg()); DOPIdx = 0; + } else if (DType == AArch64::ConstructiveUnaryPassthru && + DstReg != MI.getOperand(SrcIdx).getReg()) { + assert(DOPRegIsUnique && "The destructive operand should be unique"); + PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx)) + .addReg(DstReg, RegState::Define) + .addReg(MI.getOperand(SrcIdx).getReg()); + DOPIdx = 0; } // @@ -535,6 +541,11 @@ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)); switch (DType) { + case AArch64::ConstructiveUnaryPassthru: + DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) + .add(MI.getOperand(PredIdx)) + .add(MI.getOperand(SrcIdx)); + break; case AArch64::DestructiveBinaryImm: case AArch64::DestructiveBinaryComm: case AArch64::DestructiveBinaryCommWithRev: diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -36,6 +36,7 @@ def DestructiveBinaryComm : DestructiveInstTypeEnum<6>; def DestructiveBinaryCommWithRev : DestructiveInstTypeEnum<7>; def DestructiveTernaryCommWithRev : DestructiveInstTypeEnum<8>; +def ConstructiveUnaryPassthru : DestructiveInstTypeEnum<9>; class FalseLanesEnum val> { bits<2> Value = val; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -482,6 +482,7 @@ DestructiveBinaryComm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x6), DestructiveBinaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x7), DestructiveTernaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x8), + ConstructiveUnaryPassthru = TSFLAG_DESTRUCTIVE_INST_TYPE(0x9), }; enum FalseLaneType { diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1701,12 +1701,12 @@ def : Pat<(nxv2i64 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))), (LD1RQ_D_IMM $gp, $base, simm4s16:$imm)>; - def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; - def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (SXTH_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; - def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i8), (SXTB_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; - def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (SXTH_ZPmZ_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>; - def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i8), (SXTB_ZPmZ_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>; - def : Pat<(sext_inreg (nxv8i16 ZPR:$Zs), nxv8i8), (SXTB_ZPmZ_H (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$Zs)>; + def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; + def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (SXTH_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; + def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i8), (SXTB_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; + def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (SXTH_ZPmZ_UNDEF_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>; + def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i8), (SXTB_ZPmZ_UNDEF_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>; + def : Pat<(sext_inreg (nxv8i16 ZPR:$Zs), nxv8i8), (SXTB_ZPmZ_UNDEF_H (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$Zs)>; // General case that we ideally never want to match. def : Pat<(vscale GPR64:$scale), (MADDXrrr (UBFMXri (RDVLI_XI 1), 4, 63), $scale, XZR)>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -334,6 +334,15 @@ : Pat<(vtd (op pg:$Op1, vts:$Op2, vtd:$Op3)), (inst $Op3, $Op1, $Op2)>; + +multiclass SVE_1_Op_PassthruUndef_Pat { + def : Pat<(vtd (op pg:$Op1, vts:$Op2, (vtd undef))), + (inst (IMPLICIT_DEF), $Op1, $Op2)>; + def : Pat<(vtd (op (pg (SVEAllActive:$Op1)), vts:$Op2, vtd:$Op3)), + (inst $Op3, $Op1, $Op2)>; +} + // Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the // type of rounding. This is matched by timm0_1 in pattern below and ignored. class SVE_1_Op_Passthru_Round_Pat; +multiclass SVE_3_Op_Undef_Pat { + def : Pat<(vtd (op (vt1 undef), vt2:$Op1, vt3:$Op2)), + (inst (IMPLICIT_DEF), $Op1, $Op2)>; + def : Pat<(vtd (op vt1:$Op1, (vt2 (SVEAllActive:$Op2)), vt3:$Op3)), + (inst $Op1, $Op2, $Op3)>; +} + class SVE_4_Op_Pat @@ -441,6 +458,14 @@ : Pat<(vt (op pt:$Pg, vt:$Src, inreg_vt, vt:$PassThru)), (inst $PassThru, $Pg, $Src)>; +multiclass SVE_InReg_Extend_PassthruUndef { + def : Pat<(vt (op pt:$Pg, vt:$Src, inreg_vt, (vt undef))), + (inst (IMPLICIT_DEF), $Pg, $Src)>; + def : Pat<(vt (op (pt (SVEAllActive:$Pg)), vt:$Src, inreg_vt, vt:$PassThru)), + (inst $PassThru, $Pg, $Src)>; +} + class SVE_Shift_DupImm_Pred_Pat @@ -518,6 +543,15 @@ } } +// +// Pseudos for passthru operands +// +let hasNoSchedulingInfo = 1 in { + class PredOneOpPassthruPseudo + : SVEPseudo2Instr, + Pseudo<(outs zprty:$Zd), (ins zprty:$Passthru, PPR3bAny:$Pg, zprty:$Zs), []>; +} + //===----------------------------------------------------------------------===// // SVE Predicate Misc Group //===----------------------------------------------------------------------===// @@ -3246,26 +3280,46 @@ let Inst{4-0} = Zd; let Constraints = "$Zd = $_Zd"; - let DestructiveInstType = DestructiveOther; + let DestructiveInstType = ConstructiveUnaryPassthru; let ElementSize = zprty.ElementSize; } multiclass sve2_int_un_pred_arit_s opc, string asm, SDPatternOperator op> { - def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>; + def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>, + SVEPseudo2Instr; + def : SVE_3_Op_Pat(NAME # _S)>; + + def _UNDEF_S : PredOneOpPassthruPseudo; + + defm : SVE_3_Op_Undef_Pat(NAME # _UNDEF_S)>; } multiclass sve2_int_un_pred_arit opc, string asm, SDPatternOperator op> { - def _B : sve2_int_un_pred_arit<0b00, opc{2}, opc{1-0}, asm, ZPR8>; - def _H : sve2_int_un_pred_arit<0b01, opc{2}, opc{1-0}, asm, ZPR16>; - def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>; - def _D : sve2_int_un_pred_arit<0b11, opc{2}, opc{1-0}, asm, ZPR64>; + def _B : sve2_int_un_pred_arit<0b00, opc{2}, opc{1-0}, asm, ZPR8>, + SVEPseudo2Instr; + def _H : sve2_int_un_pred_arit<0b01, opc{2}, opc{1-0}, asm, ZPR16>, + SVEPseudo2Instr; + def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>, + SVEPseudo2Instr; + def _D : sve2_int_un_pred_arit<0b11, opc{2}, opc{1-0}, asm, ZPR64>, + SVEPseudo2Instr; def : SVE_3_Op_Pat(NAME # _B)>; def : SVE_3_Op_Pat(NAME # _H)>; def : SVE_3_Op_Pat(NAME # _S)>; def : SVE_3_Op_Pat(NAME # _D)>; + + def _UNDEF_B : PredOneOpPassthruPseudo; + def _UNDEF_H : PredOneOpPassthruPseudo; + def _UNDEF_S : PredOneOpPassthruPseudo; + def _UNDEF_D : PredOneOpPassthruPseudo; + + defm : SVE_3_Op_Undef_Pat(NAME # _UNDEF_B)>; + defm : SVE_3_Op_Undef_Pat(NAME # _UNDEF_H)>; + defm : SVE_3_Op_Undef_Pat(NAME # _UNDEF_S)>; + defm : SVE_3_Op_Undef_Pat(NAME # _UNDEF_D)>; } //===----------------------------------------------------------------------===// @@ -3866,67 +3920,122 @@ let Inst{4-0} = Zd; let Constraints = "$Zd = $_Zd"; - let DestructiveInstType = DestructiveOther; + let DestructiveInstType = ConstructiveUnaryPassthru; let ElementSize = zprty.ElementSize; } multiclass sve_int_un_pred_arit_0 opc, string asm, SDPatternOperator op> { - def _B : sve_int_un_pred_arit<0b00, { opc, 0b0 }, asm, ZPR8>; - def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>; - def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>; - def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; + def _B : sve_int_un_pred_arit<0b00, { opc, 0b0 }, asm, ZPR8>, + SVEPseudo2Instr; + def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>, + SVEPseudo2Instr; + def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>, + SVEPseudo2Instr; + def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>, + SVEPseudo2Instr; def : SVE_1_Op_Passthru_Pat(NAME # _B)>; def : SVE_1_Op_Passthru_Pat(NAME # _H)>; def : SVE_1_Op_Passthru_Pat(NAME # _S)>; def : SVE_1_Op_Passthru_Pat(NAME # _D)>; + + def _UNDEF_B : PredOneOpPassthruPseudo; + def _UNDEF_H : PredOneOpPassthruPseudo; + def _UNDEF_S : PredOneOpPassthruPseudo; + def _UNDEF_D : PredOneOpPassthruPseudo; + + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_B)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_S)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_D)>; } multiclass sve_int_un_pred_arit_0_h opc, string asm, SDPatternOperator op> { - def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>; - def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>; - def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; + def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>, + SVEPseudo2Instr; + def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>, + SVEPseudo2Instr; + def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>, + SVEPseudo2Instr; def : SVE_InReg_Extend(NAME # _H)>; def : SVE_InReg_Extend(NAME # _S)>; def : SVE_InReg_Extend(NAME # _D)>; + + def _UNDEF_H : PredOneOpPassthruPseudo; + def _UNDEF_S : PredOneOpPassthruPseudo; + def _UNDEF_D : PredOneOpPassthruPseudo; + + defm : SVE_InReg_Extend_PassthruUndef(NAME # _UNDEF_H)>; + defm : SVE_InReg_Extend_PassthruUndef(NAME # _UNDEF_S)>; + defm : SVE_InReg_Extend_PassthruUndef(NAME # _UNDEF_D)>; } multiclass sve_int_un_pred_arit_0_w opc, string asm, SDPatternOperator op> { - def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>; - def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; + def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>, + SVEPseudo2Instr; + def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>, + SVEPseudo2Instr; def : SVE_InReg_Extend(NAME # _S)>; def : SVE_InReg_Extend(NAME # _D)>; + + def _UNDEF_S : PredOneOpPassthruPseudo; + def _UNDEF_D : PredOneOpPassthruPseudo; + + defm : SVE_InReg_Extend_PassthruUndef(NAME # _UNDEF_S)>; + defm : SVE_InReg_Extend_PassthruUndef(NAME # _UNDEF_D)>; } multiclass sve_int_un_pred_arit_0_d opc, string asm, SDPatternOperator op> { - def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; + def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>, + SVEPseudo2Instr; def : SVE_InReg_Extend(NAME # _D)>; + + def _UNDEF_D : PredOneOpPassthruPseudo; + + defm : SVE_InReg_Extend_PassthruUndef(NAME # _UNDEF_D)>; } multiclass sve_int_un_pred_arit_1 opc, string asm, SDPatternOperator op> { - def _B : sve_int_un_pred_arit<0b00, { opc, 0b1 }, asm, ZPR8>; - def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>; - def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>; - def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>; + def _B : sve_int_un_pred_arit<0b00, { opc, 0b1 }, asm, ZPR8>, + SVEPseudo2Instr; + def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>, + SVEPseudo2Instr; + def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>, + SVEPseudo2Instr; + def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>, + SVEPseudo2Instr; def : SVE_1_Op_Passthru_Pat(NAME # _B)>; def : SVE_1_Op_Passthru_Pat(NAME # _H)>; def : SVE_1_Op_Passthru_Pat(NAME # _S)>; def : SVE_1_Op_Passthru_Pat(NAME # _D)>; + + def _UNDEF_B : PredOneOpPassthruPseudo; + def _UNDEF_H : PredOneOpPassthruPseudo; + def _UNDEF_S : PredOneOpPassthruPseudo; + def _UNDEF_D : PredOneOpPassthruPseudo; + + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_B)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_S)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_D)>; } multiclass sve_int_un_pred_arit_1_fp opc, string asm, SDPatternOperator op> { - def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>; - def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>; - def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>; + def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>, + SVEPseudo2Instr; + def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>, + SVEPseudo2Instr; + def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>, + SVEPseudo2Instr; def : SVE_1_Op_Passthru_Pat(NAME # _H)>; def : SVE_1_Op_Passthru_Pat(NAME # _H)>; @@ -3934,6 +4043,17 @@ def : SVE_1_Op_Passthru_Pat(NAME # _S)>; def : SVE_1_Op_Passthru_Pat(NAME # _S)>; def : SVE_1_Op_Passthru_Pat(NAME # _D)>; + + def _UNDEF_H : PredOneOpPassthruPseudo; + def _UNDEF_S : PredOneOpPassthruPseudo; + def _UNDEF_D : PredOneOpPassthruPseudo; + + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_S)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_S)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_D)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll b/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll --- a/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll +++ b/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll @@ -33,6 +33,7 @@ ; CHECK-LABEL: sti32ldi32ext: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: sxtw z1.d, p0/m, z0.d ; CHECK-NEXT: st1w { z0.d }, p0, [x0] ; CHECK-NEXT: mov z0.d, z1.d diff --git a/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll b/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll --- a/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll +++ b/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll @@ -32,6 +32,7 @@ ; CHECK-LABEL: no_dag_combine_sext ; CHECK: ld1b { z1.d }, p0/z, [z0.d, #16] ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: sxtb z0.d, p0/m, z1.d ; CHECK-NEXT: st1b { z1.d }, p1, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll --- a/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll @@ -9,6 +9,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d] ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: sxtb z2.d, p0/m, z0.d ; CHECK-NEXT: add z0.d, z0.d, z1.d ; CHECK-NEXT: sxtb z0.d, p0/m, z0.d diff --git a/llvm/test/CodeGen/AArch64/sve-unary-movprfx.ll b/llvm/test/CodeGen/AArch64/sve-unary-movprfx.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-unary-movprfx.ll @@ -0,0 +1,2612 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; +; ABS +; + +define @abs_i8( %a, %b) #0 { +; CHECK-LABEL: abs_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: abs z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %ret = tail call @llvm.abs.nxv16i8( %b, i1 0) + ret %ret +} + +define @abs_i8_dupreg( %a) #0 { +; CHECK-LABEL: abs_i8_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: abs z0.b, p0/m, z0.b +; CHECK-NEXT: ret + %ret = tail call @llvm.abs.nxv16i8( %a, i1 0) + ret %ret +} + +define @abs_i8_undef( %a, %b) #0 { +; CHECK-LABEL: abs_i8_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: abs z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.abs.nxv16i8( undef, %pg, %b) + ret %ret +} + +define @abs_i8_active( %a, %b) #0 { +; CHECK-LABEL: abs_i8_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: abs z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.abs.nxv16i8( %a, %pg, %b) + ret %ret +} + +define @abs_i8_not_active( %a, %b) #0 { +; CHECK-LABEL: abs_i8_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: abs z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %ret = tail call @llvm.aarch64.sve.abs.nxv16i8( %a, %pg.to, %b) + ret %ret +} + +define @abs_i16( %a, %b) #0 { +; CHECK-LABEL: abs_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: abs z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %ret = tail call @llvm.abs.nxv8i16( %b, i1 0) + ret %ret +} + +define @abs_i16_dupreg( %a) #0 { +; CHECK-LABEL: abs_i16_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: abs z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %ret = tail call @llvm.abs.nxv8i16( %a, i1 0) + ret %ret +} + +define @abs_i16_undef( %a, %b) #0 { +; CHECK-LABEL: abs_i16_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: abs z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.abs.nxv8i16( undef, %pg, %b) + ret %ret +} + +define @abs_i16_active( %a, %b) #0 { +; CHECK-LABEL: abs_i16_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: abs z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.abs.nxv8i16( %a, %pg, %b) + ret %ret +} + +define @abs_i16_not_active( %a, %b) #0 { +; CHECK-LABEL: abs_i16_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: abs z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.abs.nxv8i16( %a, %pg.from, %b) + ret %ret +} + +define @abs_i32( %a, %b) #0 { +; CHECK-LABEL: abs_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: abs z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %ret = tail call @llvm.abs.nxv4i32( %b, i1 0) + ret %ret +} + +define @abs_i32_dupreg( %a) #0 { +; CHECK-LABEL: abs_i32_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: abs z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %ret = tail call @llvm.abs.nxv4i32( %a, i1 0) + ret %ret +} + +define @abs_i32_undef( %a, %b) #0 { +; CHECK-LABEL: abs_i32_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: abs z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.abs.nxv4i32( undef, %pg, %b) + ret %ret +} + +define @abs_i32_active( %a, %b) #0 { +; CHECK-LABEL: abs_i32_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: abs z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.abs.nxv4i32( %a, %pg, %b) + ret %ret +} + +define @abs_i32_not_active( %a, %b) #0 { +; CHECK-LABEL: abs_i32_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: abs z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.abs.nxv4i32( %a, %pg.from, %b) + ret %ret +} + +define @abs_i64( %a, %b) #0 { +; CHECK-LABEL: abs_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: abs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.abs.nxv2i64( %b, i1 0) + ret %ret +} + +define @abs_i64_dupreg( %a) #0 { +; CHECK-LABEL: abs_i64_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: abs z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %ret = tail call @llvm.abs.nxv2i64( %a, i1 0) + ret %ret +} + +define @abs_i64_undef( %a, %b) #0 { +; CHECK-LABEL: abs_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: abs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.abs.nxv2i64( undef, %pg, %b) + ret %ret +} + +define @abs_i64_active( %a, %b) #0 { +; CHECK-LABEL: abs_i64_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: abs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.abs.nxv2i64( %a, %pg, %b) + ret %ret +} + +define @abs_i64_not_active( %a, %b, %pg) #0 { +; CHECK-LABEL: abs_i64_not_active: +; CHECK: // %bb.0: +; CHECK: abs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.aarch64.sve.abs.nxv2i64( %a, %pg, %b) + ret %ret +} + +; +; NEG +; + +define @neg_i8_dupreg( %a) #0 { +; CHECK-LABEL: neg_i8_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: neg z0.b, p0/m, z0.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.neg.nxv16i8( undef, %pg, %a) + ret %ret +} + +define @neg_i8_undef( %a, %b) #0 { +; CHECK-LABEL: neg_i8_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: neg z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.neg.nxv16i8( undef, %pg, %b) + ret %ret +} + +define @neg_i8_active( %a, %b) #0 { +; CHECK-LABEL: neg_i8_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: neg z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.neg.nxv16i8( %a, %pg, %b) + ret %ret +} + +define @neg_i8_not_active( %a, %b) #0 { +; CHECK-LABEL: neg_i8_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: neg z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %ret = tail call @llvm.aarch64.sve.neg.nxv16i8( %a, %pg.to, %b) + ret %ret +} + +define @neg_i16_dupreg( %a) #0 { +; CHECK-LABEL: neg_i16_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: neg z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.neg.nxv8i16( undef, %pg, %a) + ret %ret +} + +define @neg_i16_undef( %a, %b) #0 { +; CHECK-LABEL: neg_i16_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: neg z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.neg.nxv8i16( undef, %pg, %b) + ret %ret +} + +define @neg_i16_active( %a, %b) #0 { +; CHECK-LABEL: neg_i16_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: neg z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.neg.nxv8i16( %a, %pg, %b) + ret %ret +} + +define @neg_i16_not_active( %a, %b) #0 { +; CHECK-LABEL: neg_i16_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: neg z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.neg.nxv8i16( %a, %pg.from, %b) + ret %ret +} + +define @neg_i32_dupreg( %a) #0 { +; CHECK-LABEL: neg_i32_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: neg z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.neg.nxv4i32( undef, %pg, %a) + ret %ret +} + +define @neg_i32_undef( %a, %b) #0 { +; CHECK-LABEL: neg_i32_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: neg z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.neg.nxv4i32( undef, %pg, %b) + ret %ret +} + +define @neg_i32_active( %a, %b) #0 { +; CHECK-LABEL: neg_i32_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: neg z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.neg.nxv4i32( %a, %pg, %b) + ret %ret +} + +define @neg_i32_not_active( %a, %b) #0 { +; CHECK-LABEL: neg_i32_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: neg z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.neg.nxv4i32( %a, %pg.from, %b) + ret %ret +} + +define @neg_i64_dupreg( %a) #0 { +; CHECK-LABEL: neg_i64_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: neg z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.neg.nxv2i64( undef, %pg, %a) + ret %ret +} + +define @neg_i64_undef( %a, %b) #0 { +; CHECK-LABEL: neg_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: neg z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.neg.nxv2i64( undef, %pg, %b) + ret %ret +} + +define @neg_i64_active( %a, %b) #0 { +; CHECK-LABEL: neg_i64_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: neg z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.neg.nxv2i64( %a, %pg, %b) + ret %ret +} + +define @neg_i64_not_active( %a, %b, %pg) #0 { +; CHECK-LABEL: neg_i64_not_active: +; CHECK: // %bb.0: +; CHECK: neg z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.aarch64.sve.neg.nxv2i64( %a, %pg, %b) + ret %ret +} + +; +; CLS +; + +define @cls_i8_dupreg( %a) #0 { +; CHECK-LABEL: cls_i8_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: cls z0.b, p0/m, z0.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cls.nxv16i8( undef, %pg, %a) + ret %ret +} + +define @cls_i8_undef( %a, %b) #0 { +; CHECK-LABEL: cls_i8_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cls z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cls.nxv16i8( undef, %pg, %b) + ret %ret +} + +define @cls_i8_active( %a, %b) #0 { +; CHECK-LABEL: cls_i8_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cls z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cls.nxv16i8( %a, %pg, %b) + ret %ret +} + +define @cls_i8_not_active( %a, %b) #0 { +; CHECK-LABEL: cls_i8_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cls z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %ret = tail call @llvm.aarch64.sve.cls.nxv16i8( %a, %pg.to, %b) + ret %ret +} + +define @cls_i16_dupreg( %a) #0 { +; CHECK-LABEL: cls_i16_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: cls z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cls.nxv8i16( undef, %pg, %a) + ret %ret +} + +define @cls_i16_undef( %a, %b) #0 { +; CHECK-LABEL: cls_i16_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cls z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cls.nxv8i16( undef, %pg, %b) + ret %ret +} + +define @cls_i16_active( %a, %b) #0 { +; CHECK-LABEL: cls_i16_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cls z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cls.nxv8i16( %a, %pg, %b) + ret %ret +} + +define @cls_i16_not_active( %a, %b) #0 { +; CHECK-LABEL: cls_i16_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cls z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.cls.nxv8i16( %a, %pg.from, %b) + ret %ret +} + +define @cls_i32_dupreg( %a) #0 { +; CHECK-LABEL: cls_i32_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cls z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cls.nxv4i32( undef, %pg, %a) + ret %ret +} + +define @cls_i32_undef( %a, %b) #0 { +; CHECK-LABEL: cls_i32_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cls z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cls.nxv4i32( undef, %pg, %b) + ret %ret +} + +define @cls_i32_active( %a, %b) #0 { +; CHECK-LABEL: cls_i32_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cls z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cls.nxv4i32( %a, %pg, %b) + ret %ret +} + +define @cls_i32_not_active( %a, %b) #0 { +; CHECK-LABEL: cls_i32_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cls z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.cls.nxv4i32( %a, %pg.from, %b) + ret %ret +} + +define @cls_i64_dupreg( %a) #0 { +; CHECK-LABEL: cls_i64_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cls z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cls.nxv2i64( undef, %pg, %a) + ret %ret +} + +define @cls_i64_undef( %a, %b) #0 { +; CHECK-LABEL: cls_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cls z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cls.nxv2i64( undef, %pg, %b) + ret %ret +} + +define @cls_i64_active( %a, %b) #0 { +; CHECK-LABEL: cls_i64_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cls z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cls.nxv2i64( %a, %pg, %b) + ret %ret +} + +define @cls_i64_not_active( %a, %b, %pg) #0 { +; CHECK-LABEL: cls_i64_not_active: +; CHECK: // %bb.0: +; CHECK: cls z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.aarch64.sve.cls.nxv2i64( %a, %pg, %b) + ret %ret +} + +; +; CLZ +; + +define @clz_i8( %a, %b) #0 { +; CHECK-LABEL: clz_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: clz z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %ret = tail call @llvm.ctlz.nxv16i8( %b, i1 0) + ret %ret +} + +define @clz_i8_dupreg( %a) #0 { +; CHECK-LABEL: clz_i8_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: clz z0.b, p0/m, z0.b +; CHECK-NEXT: ret + %ret = tail call @llvm.ctlz.nxv16i8( %a, i1 0) + ret %ret +} + +define @clz_i8_undef( %a, %b) #0 { +; CHECK-LABEL: clz_i8_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: clz z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.clz.nxv16i8( undef, %pg, %b) + ret %ret +} + +define @clz_i8_active( %a, %b) #0 { +; CHECK-LABEL: clz_i8_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: clz z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.clz.nxv16i8( %a, %pg, %b) + ret %ret +} + +define @clz_i8_not_active( %a, %b) #0 { +; CHECK-LABEL: clz_i8_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: clz z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %ret = tail call @llvm.aarch64.sve.clz.nxv16i8( %a, %pg.to, %b) + ret %ret +} + +define @clz_i16( %a, %b) #0 { +; CHECK-LABEL: clz_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: clz z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %ret = tail call @llvm.ctlz.nxv8i16( %b, i1 0) + ret %ret +} + +define @clz_i16_dupreg( %a) #0 { +; CHECK-LABEL: clz_i16_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: clz z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %ret = tail call @llvm.ctlz.nxv8i16( %a, i1 0) + ret %ret +} + +define @clz_i16_undef( %a, %b) #0 { +; CHECK-LABEL: clz_i16_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: clz z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.clz.nxv8i16( undef, %pg, %b) + ret %ret +} + +define @clz_i16_active( %a, %b) #0 { +; CHECK-LABEL: clz_i16_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: clz z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.clz.nxv8i16( %a, %pg, %b) + ret %ret +} + +define @clz_i16_not_active( %a, %b) #0 { +; CHECK-LABEL: clz_i16_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: clz z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.clz.nxv8i16( %a, %pg.from, %b) + ret %ret +} + +define @clz_i32( %a, %b) #0 { +; CHECK-LABEL: clz_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: clz z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %ret = tail call @llvm.ctlz.nxv4i32( %b, i1 0) + ret %ret +} + +define @clz_i32_dupreg( %a) #0 { +; CHECK-LABEL: clz_i32_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: clz z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %ret = tail call @llvm.ctlz.nxv4i32( %a, i1 0) + ret %ret +} + +define @clz_i32_undef( %a, %b) #0 { +; CHECK-LABEL: clz_i32_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: clz z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.clz.nxv4i32( undef, %pg, %b) + ret %ret +} + +define @clz_i32_active( %a, %b) #0 { +; CHECK-LABEL: clz_i32_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: clz z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.clz.nxv4i32( %a, %pg, %b) + ret %ret +} + +define @clz_i32_not_active( %a, %b) #0 { +; CHECK-LABEL: clz_i32_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: clz z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.clz.nxv4i32( %a, %pg.from, %b) + ret %ret +} + +define @clz_i64( %a, %b) #0 { +; CHECK-LABEL: clz_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: clz z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.ctlz.nxv2i64( %b, i1 0) + ret %ret +} + +define @clz_i64_dupreg( %a) #0 { +; CHECK-LABEL: clz_i64_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: clz z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %ret = tail call @llvm.ctlz.nxv2i64( %a, i1 0) + ret %ret +} + +define @clz_i64_undef( %a, %b) #0 { +; CHECK-LABEL: clz_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: clz z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.clz.nxv2i64( undef, %pg, %b) + ret %ret +} + +define @clz_i64_active( %a, %b) #0 { +; CHECK-LABEL: clz_i64_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: clz z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.clz.nxv2i64( %a, %pg, %b) + ret %ret +} + +define @clz_i64_not_active( %a, %b, %pg) #0 { +; CHECK-LABEL: clz_i64_not_active: +; CHECK: // %bb.0: +; CHECK: clz z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.aarch64.sve.clz.nxv2i64( %a, %pg, %b) + ret %ret +} + +; +; CNOT +; + +define @cnot_i8_dupreg( %a) #0 { +; CHECK-LABEL: cnot_i8_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: cnot z0.b, p0/m, z0.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnot.nxv16i8( undef, %pg, %a) + ret %ret +} + +define @cnot_i8_undef( %a, %b) #0 { +; CHECK-LABEL: cnot_i8_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnot z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnot.nxv16i8( undef, %pg, %b) + ret %ret +} + +define @cnot_i8_active( %a, %b) #0 { +; CHECK-LABEL: cnot_i8_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnot z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnot.nxv16i8( %a, %pg, %b) + ret %ret +} + +define @cnot_i8_not_active( %a, %b) #0 { +; CHECK-LABEL: cnot_i8_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cnot z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %ret = tail call @llvm.aarch64.sve.cnot.nxv16i8( %a, %pg.to, %b) + ret %ret +} + +define @cnot_i16_dupreg( %a) #0 { +; CHECK-LABEL: cnot_i16_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: cnot z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnot.nxv8i16( undef, %pg, %a) + ret %ret +} + +define @cnot_i16_undef( %a, %b) #0 { +; CHECK-LABEL: cnot_i16_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnot z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnot.nxv8i16( undef, %pg, %b) + ret %ret +} + +define @cnot_i16_active( %a, %b) #0 { +; CHECK-LABEL: cnot_i16_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnot z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnot.nxv8i16( %a, %pg, %b) + ret %ret +} + +define @cnot_i16_not_active( %a, %b) #0 { +; CHECK-LABEL: cnot_i16_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cnot z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.cnot.nxv8i16( %a, %pg.from, %b) + ret %ret +} + +define @cnot_i32_dupreg( %a) #0 { +; CHECK-LABEL: cnot_i32_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cnot z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnot.nxv4i32( undef, %pg, %a) + ret %ret +} + +define @cnot_i32_undef( %a, %b) #0 { +; CHECK-LABEL: cnot_i32_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnot z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnot.nxv4i32( undef, %pg, %b) + ret %ret +} + +define @cnot_i32_active( %a, %b) #0 { +; CHECK-LABEL: cnot_i32_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnot z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnot.nxv4i32( %a, %pg, %b) + ret %ret +} + +define @cnot_i32_not_active( %a, %b) #0 { +; CHECK-LABEL: cnot_i32_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cnot z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.cnot.nxv4i32( %a, %pg.from, %b) + ret %ret +} + +define @cnot_i64_dupreg( %a) #0 { +; CHECK-LABEL: cnot_i64_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cnot z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnot.nxv2i64( undef, %pg, %a) + ret %ret +} + +define @cnot_i64_undef( %a, %b) #0 { +; CHECK-LABEL: cnot_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnot z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnot.nxv2i64( undef, %pg, %b) + ret %ret +} + +define @cnot_i64_active( %a, %b) #0 { +; CHECK-LABEL: cnot_i64_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnot z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnot.nxv2i64( %a, %pg, %b) + ret %ret +} + +define @cnot_i64_not_active( %a, %b, %pg) #0 { +; CHECK-LABEL: cnot_i64_not_active: +; CHECK: // %bb.0: +; CHECK: cnot z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.aarch64.sve.cnot.nxv2i64( %a, %pg, %b) + ret %ret +} + +; +; NOT +; + +define @not_i8_dupreg( %a) #0 { +; CHECK-LABEL: not_i8_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: not z0.b, p0/m, z0.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.not.nxv16i8( undef, %pg, %a) + ret %ret +} + +define @not_i8_undef( %a, %b) #0 { +; CHECK-LABEL: not_i8_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: not z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.not.nxv16i8( undef, %pg, %b) + ret %ret +} + +define @not_i8_active( %a, %b) #0 { +; CHECK-LABEL: not_i8_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: not z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.not.nxv16i8( %a, %pg, %b) + ret %ret +} + +define @not_i8_not_active( %a, %b) #0 { +; CHECK-LABEL: not_i8_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: not z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %ret = tail call @llvm.aarch64.sve.not.nxv16i8( %a, %pg.to, %b) + ret %ret +} + +define @not_i16_dupreg( %a) #0 { +; CHECK-LABEL: not_i16_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: not z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.not.nxv8i16( undef, %pg, %a) + ret %ret +} + +define @not_i16_undef( %a, %b) #0 { +; CHECK-LABEL: not_i16_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: not z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.not.nxv8i16( undef, %pg, %b) + ret %ret +} + +define @not_i16_active( %a, %b) #0 { +; CHECK-LABEL: not_i16_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: not z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.not.nxv8i16( %a, %pg, %b) + ret %ret +} + +define @not_i16_not_active( %a, %b) #0 { +; CHECK-LABEL: not_i16_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: not z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.not.nxv8i16( %a, %pg.from, %b) + ret %ret +} + +define @not_i32_dupreg( %a) #0 { +; CHECK-LABEL: not_i32_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: not z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.not.nxv4i32( undef, %pg, %a) + ret %ret +} + +define @not_i32_undef( %a, %b) #0 { +; CHECK-LABEL: not_i32_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: not z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.not.nxv4i32( undef, %pg, %b) + ret %ret +} + +define @not_i32_active( %a, %b) #0 { +; CHECK-LABEL: not_i32_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: not z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.not.nxv4i32( %a, %pg, %b) + ret %ret +} + +define @not_i32_not_active( %a, %b) #0 { +; CHECK-LABEL: not_i32_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: not z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.not.nxv4i32( %a, %pg.from, %b) + ret %ret +} + +define @not_i64_dupreg( %a) #0 { +; CHECK-LABEL: not_i64_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: not z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.not.nxv2i64( undef, %pg, %a) + ret %ret +} + +define @not_i64_undef( %a, %b) #0 { +; CHECK-LABEL: not_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: not z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.not.nxv2i64( undef, %pg, %b) + ret %ret +} + +define @not_i64_active( %a, %b) #0 { +; CHECK-LABEL: not_i64_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: not z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.not.nxv2i64( %a, %pg, %b) + ret %ret +} + +define @not_i64_not_active( %a, %b, %pg) #0 { +; CHECK-LABEL: not_i64_not_active: +; CHECK: // %bb.0: +; CHECK: not z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.aarch64.sve.not.nxv2i64( %a, %pg, %b) + ret %ret +} + +; +; CNT +; + +define @cnt_i8( %a, %b) #0 { +; CHECK-LABEL: cnt_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnt z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %ret = tail call @llvm.ctpop.nxv16i8( %b) + ret %ret +} + +define @cnt_i8_dupreg( %a) #0 { +; CHECK-LABEL: cnt_i8_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: cnt z0.b, p0/m, z0.b +; CHECK-NEXT: ret + %ret = tail call @llvm.ctpop.nxv16i8( %a) + ret %ret +} + +define @cnt_i8_undef( %a, %b) #0 { +; CHECK-LABEL: cnt_i8_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnt z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnt.nxv16i8( undef, %pg, %b) + ret %ret +} + +define @cnt_i8_active( %a, %b) #0 { +; CHECK-LABEL: cnt_i8_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnt z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnt.nxv16i8( %a, %pg, %b) + ret %ret +} + +define @cnt_i8_not_active( %a, %b) #0 { +; CHECK-LABEL: cnt_i8_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cnt z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %ret = tail call @llvm.aarch64.sve.cnt.nxv16i8( %a, %pg.to, %b) + ret %ret +} + +define @cnt_i16( %a, %b) #0 { +; CHECK-LABEL: cnt_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnt z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %ret = tail call @llvm.ctpop.nxv8i16( %b) + ret %ret +} + +define @cnt_i16_dupreg( %a) #0 { +; CHECK-LABEL: cnt_i16_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: cnt z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %ret = tail call @llvm.ctpop.nxv8i16( %a) + ret %ret +} + +define @cnt_i16_undef( %a, %b) #0 { +; CHECK-LABEL: cnt_i16_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnt z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnt.nxv8i16( undef, %pg, %b) + ret %ret +} + +define @cnt_i16_active( %a, %b) #0 { +; CHECK-LABEL: cnt_i16_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnt z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnt.nxv8i16( %a, %pg, %b) + ret %ret +} + +define @cnt_i16_not_active( %a, %b) #0 { +; CHECK-LABEL: cnt_i16_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cnt z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.cnt.nxv8i16( %a, %pg.from, %b) + ret %ret +} + +define @cnt_i32( %a, %b) #0 { +; CHECK-LABEL: cnt_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnt z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %ret = tail call @llvm.ctpop.nxv4i32( %b) + ret %ret +} + +define @cnt_i32_dupreg( %a) #0 { +; CHECK-LABEL: cnt_i32_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cnt z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %ret = tail call @llvm.ctpop.nxv4i32( %a) + ret %ret +} + +define @cnt_i32_undef( %a, %b) #0 { +; CHECK-LABEL: cnt_i32_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnt z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnt.nxv4i32( undef, %pg, %b) + ret %ret +} + +define @cnt_i32_active( %a, %b) #0 { +; CHECK-LABEL: cnt_i32_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnt z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnt.nxv4i32( %a, %pg, %b) + ret %ret +} + +define @cnt_i32_not_active( %a, %b) #0 { +; CHECK-LABEL: cnt_i32_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cnt z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.cnt.nxv4i32( %a, %pg.from, %b) + ret %ret +} + +define @cnt_i64( %a, %b) #0 { +; CHECK-LABEL: cnt_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnt z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.ctpop.nxv2i64( %b) + ret %ret +} + +define @cnt_i64_dupreg( %a) #0 { +; CHECK-LABEL: cnt_i64_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cnt z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %ret = tail call @llvm.ctpop.nxv2i64( %a) + ret %ret +} + +define @cnt_i64_undef( %a, %b) #0 { +; CHECK-LABEL: cnt_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnt z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnt.nxv2i64( undef, %pg, %b) + ret %ret +} + +define @cnt_i64_active( %a, %b) #0 { +; CHECK-LABEL: cnt_i64_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: cnt z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.cnt.nxv2i64( %a, %pg, %b) + ret %ret +} + +define @cnt_i64_not_active( %a, %b, %pg) #0 { +; CHECK-LABEL: cnt_i64_not_active: +; CHECK: // %bb.0: +; CHECK: cnt z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.aarch64.sve.cnt.nxv2i64( %a, %pg, %b) + ret %ret +} + +; +; FABS +; + +define @fabs_f16( %a, %b) #0 { +; CHECK-LABEL: fabs_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fabs z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %ret = tail call @llvm.fabs.nxv8f16( %b) + ret %ret +} + +define @fabs_f16_dupreg( %a) #0 { +; CHECK-LABEL: fabs_f16_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fabs z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %ret = tail call @llvm.fabs.nxv8f16( %a) + ret %ret +} + +define @fabs_f16_undef( %a, %b) #0 { +; CHECK-LABEL: fabs_f16_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fabs z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.fabs.nxv8f16( undef, %pg, %b) + ret %ret +} + +define @fabs_f16_active( %a, %b) #0 { +; CHECK-LABEL: fabs_f16_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fabs z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.fabs.nxv8f16( %a, %pg, %b) + ret %ret +} + +define @fabs_f16_not_active( %a, %b) #0 { +; CHECK-LABEL: fabs_f16_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fabs z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.fabs.nxv8f16( %a, %pg.from, %b) + ret %ret +} + +define @fabs_f32( %a, %b) #0 { +; CHECK-LABEL: fabs_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fabs z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %ret = tail call @llvm.fabs.nxv4f32( %b) + ret %ret +} + +define @fabs_f32_dupreg( %a) #0 { +; CHECK-LABEL: fabs_f32_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fabs z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %ret = tail call @llvm.fabs.nxv4f32( %a) + ret %ret +} + +define @fabs_f32_undef( %a, %b) #0 { +; CHECK-LABEL: fabs_f32_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fabs z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.fabs.nxv4f32( undef, %pg, %b) + ret %ret +} + +define @fabs_f32_active( %a, %b) #0 { +; CHECK-LABEL: fabs_f32_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fabs z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.fabs.nxv4f32( %a, %pg, %b) + ret %ret +} + +define @fabs_f32_not_active( %a, %b) #0 { +; CHECK-LABEL: fabs_f32_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fabs z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.fabs.nxv4f32( %a, %pg.from, %b) + ret %ret +} + +define @fabs_f64( %a, %b) #0 { +; CHECK-LABEL: fabs_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fabs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.fabs.nxv2f64( %b) + ret %ret +} + +define @fabs_f64_dupreg( %a) #0 { +; CHECK-LABEL: fabs_f64_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fabs z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %ret = tail call @llvm.fabs.nxv2f64( %a) + ret %ret +} + +define @fabs_f64_undef( %a, %b) #0 { +; CHECK-LABEL: fabs_f64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fabs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.fabs.nxv2f64( undef, %pg, %b) + ret %ret +} + +define @fabs_f64_active( %a, %b) #0 { +; CHECK-LABEL: fabs_f64_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fabs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.fabs.nxv2f64( %a, %pg, %b) + ret %ret +} + +define @fabs_f64_not_active( %a, %b, %pg) #0 { +; CHECK-LABEL: fabs_f64_not_active: +; CHECK: // %bb.0: +; CHECK: fabs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.aarch64.sve.fabs.nxv2f64( %a, %pg, %b) + ret %ret +} + +; +; FNEG +; + +define @fneg_f16( %a, %b) #0 { +; CHECK-LABEL: fneg_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fneg z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %ret = fneg %b + ret %ret +} + +define @fneg_f16_dupreg( %a) #0 { +; CHECK-LABEL: fneg_f16_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fneg z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %ret = fneg %a + ret %ret +} + +define @fneg_f16_undef( %a, %b) #0 { +; CHECK-LABEL: fneg_f16_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fneg z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.fneg.nxv8f16( undef, %pg, %b) + ret %ret +} + +define @fneg_f16_active( %a, %b) #0 { +; CHECK-LABEL: fneg_f16_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fneg z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.fneg.nxv8f16( %a, %pg, %b) + ret %ret +} + +define @fneg_f16_not_active( %a, %b) #0 { +; CHECK-LABEL: fneg_f16_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fneg z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.fneg.nxv8f16( %a, %pg.from, %b) + ret %ret +} + +define @fneg_f32( %a, %b) #0 { +; CHECK-LABEL: fneg_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fneg z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %ret = fneg %b + ret %ret +} + +define @fneg_f32_dupreg( %a) #0 { +; CHECK-LABEL: fneg_f32_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fneg z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %ret = fneg %a + ret %ret +} + +define @fneg_f32_undef( %a, %b) #0 { +; CHECK-LABEL: fneg_f32_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fneg z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.fneg.nxv4f32( undef, %pg, %b) + ret %ret +} + +define @fneg_f32_active( %a, %b) #0 { +; CHECK-LABEL: fneg_f32_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fneg z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.fneg.nxv4f32( %a, %pg, %b) + ret %ret +} + +define @fneg_f32_not_active( %a, %b) #0 { +; CHECK-LABEL: fneg_f32_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fneg z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.fneg.nxv4f32( %a, %pg.from, %b) + ret %ret +} + +define @fneg_f64( %a, %b) #0 { +; CHECK-LABEL: fneg_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fneg z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = fneg %b + ret %ret +} + +define @fneg_f64_dupreg( %a) #0 { +; CHECK-LABEL: fneg_f64_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fneg z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %ret = fneg %a + ret %ret +} + +define @fneg_f64_undef( %a, %b) #0 { +; CHECK-LABEL: fneg_f64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fneg z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.fneg.nxv2f64( undef, %pg, %b) + ret %ret +} + +define @fneg_f64_active( %a, %b) #0 { +; CHECK-LABEL: fneg_f64_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fneg z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.fneg.nxv2f64( %a, %pg, %b) + ret %ret +} + +define @fneg_f64_not_active( %a, %b, %pg) #0 { +; CHECK-LABEL: fneg_f64_not_active: +; CHECK: // %bb.0: +; CHECK: fneg z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.aarch64.sve.fneg.nxv2f64( %a, %pg, %b) + ret %ret +} + +; +; SXTB +; + +define @sxtb_i16( %a, %b) #0 { +; CHECK-LABEL: sxtb_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sxtb z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %ret = sext %b to + ret %ret +} + +define @sxtb_i16_dupreg( %a) #0 { +; CHECK-LABEL: sxtb_i16_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: sxtb z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %ret = sext %a to + ret %ret +} + +define @sxtb_i16_undef( %a, %b) #0 { +; CHECK-LABEL: sxtb_i16_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sxtb z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sxtb.nxv8i16( undef, %pg, %b) + ret %ret +} + +define @sxtb_i16_active( %a, %b) #0 { +; CHECK-LABEL: sxtb_i16_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sxtb z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sxtb.nxv8i16( %a, %pg, %b) + ret %ret +} + +define @sxtb_i16_not_active( %a, %b) #0 { +; CHECK-LABEL: sxtb_i16_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtb z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.sxtb.nxv8i16( %a, %pg.from, %b) + ret %ret +} + +define @sxtb_i32( %a, %b) #0 { +; CHECK-LABEL: sxtb_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sxtb z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %ret = sext %b to + ret %ret +} + +define @sxtb_i32_dupreg( %a) #0 { +; CHECK-LABEL: sxtb_i32_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sxtb z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %ret = sext %a to + ret %ret +} + +define @sxtb_i32_undef( %a, %b) #0 { +; CHECK-LABEL: sxtb_i32_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sxtb z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sxtb.nxv4i32( undef, %pg, %b) + ret %ret +} + +define @sxtb_i32_active( %a, %b) #0 { +; CHECK-LABEL: sxtb_i32_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sxtb z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sxtb.nxv4i32( %a, %pg, %b) + ret %ret +} + +define @sxtb_i32_not_active( %a, %b) #0 { +; CHECK-LABEL: sxtb_i32_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtb z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.sxtb.nxv4i32( %a, %pg.from, %b) + ret %ret +} + +define @sxtb_i64( %a, %b) #0 { +; CHECK-LABEL: sxtb_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sxtb z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = sext %b to + ret %ret +} + +define @sxtb_i64_dupreg( %a) #0 { +; CHECK-LABEL: sxtb_i64_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtb z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %ret = sext %a to + ret %ret +} + +define @sxtb_i64_undef( %a, %b) #0 { +; CHECK-LABEL: sxtb_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sxtb z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sxtb.nxv2i64( undef, %pg, %b) + ret %ret +} + +define @sxtb_i64_active( %a, %b) #0 { +; CHECK-LABEL: sxtb_i64_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sxtb z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sxtb.nxv2i64( %a, %pg, %b) + ret %ret +} + +define @sxtb_i64_not_active( %a, %b, %pg) #0 { +; CHECK-LABEL: sxtb_i64_not_active: +; CHECK: // %bb.0: +; CHECK: sxtb z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.aarch64.sve.sxtb.nxv2i64( %a, %pg, %b) + ret %ret +} + +; +; SXTH +; + +define @sxth_i32( %a, %b) #0 { +; CHECK-LABEL: sxth_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sxth z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %ret = sext %b to + ret %ret +} + +define @sxth_i32_dupreg( %a) #0 { +; CHECK-LABEL: sxth_i32_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sxth z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %ret = sext %a to + ret %ret +} + +define @sxth_i32_undef( %a, %b) #0 { +; CHECK-LABEL: sxth_i32_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sxth z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sxth.nxv4i32( undef, %pg, %b) + ret %ret +} + +define @sxth_i32_active( %a, %b) #0 { +; CHECK-LABEL: sxth_i32_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sxth z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sxth.nxv4i32( %a, %pg, %b) + ret %ret +} + +define @sxth_i32_not_active( %a, %b) #0 { +; CHECK-LABEL: sxth_i32_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxth z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.sxth.nxv4i32( %a, %pg.from, %b) + ret %ret +} + +define @sxth_i64( %a, %b) #0 { +; CHECK-LABEL: sxth_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sxth z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = sext %b to + ret %ret +} + +define @sxth_i64_dupreg( %a) #0 { +; CHECK-LABEL: sxth_i64_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxth z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %ret = sext %a to + ret %ret +} + +define @sxth_i64_undef( %a, %b) #0 { +; CHECK-LABEL: sxth_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sxth z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sxth.nxv2i64( undef, %pg, %b) + ret %ret +} + +define @sxth_i64_active( %a, %b) #0 { +; CHECK-LABEL: sxth_i64_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sxth z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sxth.nxv2i64( %a, %pg, %b) + ret %ret +} + +define @sxth_i64_not_active( %a, %b, %pg) #0 { +; CHECK-LABEL: sxth_i64_not_active: +; CHECK: // %bb.0: +; CHECK: sxth z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.aarch64.sve.sxth.nxv2i64( %a, %pg, %b) + ret %ret +} + +; +; SXTW +; + +define @sxtw_i64( %a, %b) #0 { +; CHECK-LABEL: sxtw_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sxtw z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = sext %b to + ret %ret +} + +define @sxtw_i64_dupreg( %a) #0 { +; CHECK-LABEL: sxtw_i64_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %ret = sext %a to + ret %ret +} + +define @sxtw_i64_undef( %a, %b) #0 { +; CHECK-LABEL: sxtw_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sxtw z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sxtw.nxv2i64( undef, %pg, %b) + ret %ret +} + +define @sxtw_i64_active( %a, %b) #0 { +; CHECK-LABEL: sxtw_i64_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sxtw z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sxtw.nxv2i64( %a, %pg, %b) + ret %ret +} + +define @sxtw_i64_not_active( %a, %b, %pg) #0 { +; CHECK-LABEL: sxtw_i64_not_active: +; CHECK: // %bb.0: +; CHECK: sxtw z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.aarch64.sve.sxtw.nxv2i64( %a, %pg, %b) + ret %ret +} + +; +; UXTB +; + +define @uxtb_i16( %a, %b) #0 { +; CHECK-LABEL: uxtb_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uxtb z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxtb.nxv8i16( undef, %pg, %b) + ret %ret +} + +define @uxtb_i16_dupreg( %a) #0 { +; CHECK-LABEL: uxtb_i16_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: uxtb z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxtb.nxv8i16( undef, %pg, %a) + ret %ret +} + +define @uxtb_i16_undef( %a, %b) #0 { +; CHECK-LABEL: uxtb_i16_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uxtb z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxtb.nxv8i16( undef, %pg, %b) + ret %ret +} + +define @uxtb_i16_active( %a, %b) #0 { +; CHECK-LABEL: uxtb_i16_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uxtb z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxtb.nxv8i16( %a, %pg, %b) + ret %ret +} + +define @uxtb_i16_not_active( %a, %b) #0 { +; CHECK-LABEL: uxtb_i16_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uxtb z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.uxtb.nxv8i16( %a, %pg.from, %b) + ret %ret +} + +define @uxtb_i32( %a, %b) #0 { +; CHECK-LABEL: uxtb_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uxtb z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxtb.nxv4i32( undef, %pg, %b) + ret %ret +} + +define @uxtb_i32_dupreg( %a) #0 { +; CHECK-LABEL: uxtb_i32_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uxtb z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxtb.nxv4i32( undef, %pg, %a) + ret %ret +} + +define @uxtb_i32_undef( %a, %b) #0 { +; CHECK-LABEL: uxtb_i32_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uxtb z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxtb.nxv4i32( undef, %pg, %b) + ret %ret +} + +define @uxtb_i32_active( %a, %b) #0 { +; CHECK-LABEL: uxtb_i32_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uxtb z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxtb.nxv4i32( %a, %pg, %b) + ret %ret +} + +define @uxtb_i32_not_active( %a, %b) #0 { +; CHECK-LABEL: uxtb_i32_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uxtb z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.uxtb.nxv4i32( %a, %pg.from, %b) + ret %ret +} + +define @uxtb_i64( %a, %b) #0 { +; CHECK-LABEL: uxtb_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uxtb z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxtb.nxv2i64( undef, %pg, %b) + ret %ret +} + +define @uxtb_i64_dupreg( %a) #0 { +; CHECK-LABEL: uxtb_i64_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uxtb z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxtb.nxv2i64( undef, %pg, %a) + ret %ret +} + +define @uxtb_i64_undef( %a, %b) #0 { +; CHECK-LABEL: uxtb_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uxtb z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxtb.nxv2i64( undef, %pg, %b) + ret %ret +} + +define @uxtb_i64_active( %a, %b) #0 { +; CHECK-LABEL: uxtb_i64_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uxtb z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxtb.nxv2i64( %a, %pg, %b) + ret %ret +} + +define @uxtb_i64_not_active( %a, %b, %pg) #0 { +; CHECK-LABEL: uxtb_i64_not_active: +; CHECK: // %bb.0: +; CHECK: uxtb z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.aarch64.sve.uxtb.nxv2i64( %a, %pg, %b) + ret %ret +} + +; +; UXTH +; + +define @uxth_i32( %a, %b) #0 { +; CHECK-LABEL: uxth_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uxth z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxth.nxv4i32( undef, %pg, %b) + ret %ret +} + +define @uxth_i32_dupreg( %a) #0 { +; CHECK-LABEL: uxth_i32_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uxth z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxth.nxv4i32( undef, %pg, %a) + ret %ret +} + +define @uxth_i32_undef( %a, %b) #0 { +; CHECK-LABEL: uxth_i32_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uxth z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxth.nxv4i32( undef, %pg, %b) + ret %ret +} + +define @uxth_i32_active( %a, %b) #0 { +; CHECK-LABEL: uxth_i32_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uxth z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxth.nxv4i32( %a, %pg, %b) + ret %ret +} + +define @uxth_i32_not_active( %a, %b) #0 { +; CHECK-LABEL: uxth_i32_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uxth z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.uxth.nxv4i32( %a, %pg.from, %b) + ret %ret +} + +define @uxth_i64( %a, %b) #0 { +; CHECK-LABEL: uxth_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uxth z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxth.nxv2i64( undef, %pg, %b) + ret %ret +} + +define @uxth_i64_dupreg( %a) #0 { +; CHECK-LABEL: uxth_i64_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uxth z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxth.nxv2i64( undef, %pg, %a) + ret %ret +} + +define @uxth_i64_undef( %a, %b) #0 { +; CHECK-LABEL: uxth_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uxth z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxth.nxv2i64( undef, %pg, %b) + ret %ret +} + +define @uxth_i64_active( %a, %b) #0 { +; CHECK-LABEL: uxth_i64_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uxth z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxth.nxv2i64( %a, %pg, %b) + ret %ret +} + +define @uxth_i64_not_active( %a, %b, %pg) #0 { +; CHECK-LABEL: uxth_i64_not_active: +; CHECK: // %bb.0: +; CHECK: uxth z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.aarch64.sve.uxth.nxv2i64( %a, %pg, %b) + ret %ret +} + +; +; UXTW +; + +define @uxtw_i64( %a, %b) #0 { +; CHECK-LABEL: uxtw_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uxtw z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxtw.nxv2i64( undef, %pg, %b) + ret %ret +} + +define @uxtw_i64_dupreg( %a) #0 { +; CHECK-LABEL: uxtw_i64_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uxtw z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxtw.nxv2i64( undef, %pg, %a) + ret %ret +} + +define @uxtw_i64_undef( %a, %b) #0 { +; CHECK-LABEL: uxtw_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uxtw z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxtw.nxv2i64( undef, %pg, %b) + ret %ret +} + +define @uxtw_i64_active( %a, %b) #0 { +; CHECK-LABEL: uxtw_i64_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uxtw z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.uxtw.nxv2i64( %a, %pg, %b) + ret %ret +} + +define @uxtw_i64_not_active( %a, %b, %pg) #0 { +; CHECK-LABEL: uxtw_i64_not_active: +; CHECK: // %bb.0: +; CHECK: uxtw z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.aarch64.sve.uxtw.nxv2i64( %a, %pg, %b) + ret %ret +} + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv2i1(i32) + +declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv2i1() + +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv2i1() + +declare @llvm.aarch64.sve.abs.nxv16i8(, , ) +declare @llvm.aarch64.sve.abs.nxv8i16(, , ) +declare @llvm.aarch64.sve.abs.nxv4i32(, , ) +declare @llvm.aarch64.sve.abs.nxv2i64(, , ) + +declare @llvm.abs.nxv16i8(, i1) +declare @llvm.abs.nxv8i16(, i1) +declare @llvm.abs.nxv4i32(, i1) +declare @llvm.abs.nxv2i64(, i1) + +declare @llvm.aarch64.sve.neg.nxv16i8(, , ) +declare @llvm.aarch64.sve.neg.nxv8i16(, , ) +declare @llvm.aarch64.sve.neg.nxv4i32(, , ) +declare @llvm.aarch64.sve.neg.nxv2i64(, , ) + +declare @llvm.aarch64.sve.cls.nxv16i8(, , ) +declare @llvm.aarch64.sve.cls.nxv8i16(, , ) +declare @llvm.aarch64.sve.cls.nxv4i32(, , ) +declare @llvm.aarch64.sve.cls.nxv2i64(, , ) + +declare @llvm.aarch64.sve.clz.nxv16i8(, , ) +declare @llvm.aarch64.sve.clz.nxv8i16(, , ) +declare @llvm.aarch64.sve.clz.nxv4i32(, , ) +declare @llvm.aarch64.sve.clz.nxv2i64(, , ) + +declare @llvm.ctlz.nxv16i8(, i1) +declare @llvm.ctlz.nxv8i16(, i1) +declare @llvm.ctlz.nxv4i32(, i1) +declare @llvm.ctlz.nxv2i64(, i1) + +declare @llvm.aarch64.sve.cnot.nxv16i8(, , ) +declare @llvm.aarch64.sve.cnot.nxv8i16(, , ) +declare @llvm.aarch64.sve.cnot.nxv4i32(, , ) +declare @llvm.aarch64.sve.cnot.nxv2i64(, , ) + +declare @llvm.aarch64.sve.not.nxv16i8(, , ) +declare @llvm.aarch64.sve.not.nxv8i16(, , ) +declare @llvm.aarch64.sve.not.nxv4i32(, , ) +declare @llvm.aarch64.sve.not.nxv2i64(, , ) + +declare @llvm.aarch64.sve.cnt.nxv16i8(, , ) +declare @llvm.aarch64.sve.cnt.nxv8i16(, , ) +declare @llvm.aarch64.sve.cnt.nxv4i32(, , ) +declare @llvm.aarch64.sve.cnt.nxv2i64(, , ) + +declare @llvm.ctpop.nxv16i8() +declare @llvm.ctpop.nxv8i16() +declare @llvm.ctpop.nxv4i32() +declare @llvm.ctpop.nxv2i64() + +declare @llvm.aarch64.sve.fabs.nxv8f16(, , ) +declare @llvm.aarch64.sve.fabs.nxv4f32(, , ) +declare @llvm.aarch64.sve.fabs.nxv2f64(, , ) + +declare @llvm.fabs.nxv8f16() +declare @llvm.fabs.nxv4f32() +declare @llvm.fabs.nxv2f64() + +declare @llvm.aarch64.sve.fneg.nxv8f16(, , ) +declare @llvm.aarch64.sve.fneg.nxv4f32(, , ) +declare @llvm.aarch64.sve.fneg.nxv2f64(, , ) + +declare @llvm.aarch64.sve.sxtb.nxv8i16(, , ) +declare @llvm.aarch64.sve.sxtb.nxv4i32(, , ) +declare @llvm.aarch64.sve.sxtb.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sxth.nxv4i32(, , ) +declare @llvm.aarch64.sve.sxth.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sxtw.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uxtb.nxv8i16(, , ) +declare @llvm.aarch64.sve.uxtb.nxv4i32(, , ) +declare @llvm.aarch64.sve.uxtb.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uxth.nxv4i32(, , ) +declare @llvm.aarch64.sve.uxth.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uxtw.nxv2i64(, , ) + +attributes #0 = { nounwind "target-features"="+sve" } diff --git a/llvm/test/CodeGen/AArch64/sve2-unary-movprfx.ll b/llvm/test/CodeGen/AArch64/sve2-unary-movprfx.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-unary-movprfx.ll @@ -0,0 +1,518 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; +; SQABS +; + +define @sqabs_i8_dupreg( %a) #0 { +; CHECK-LABEL: sqabs_i8_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: sqabs z0.b, p0/m, z0.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqabs.nxv16i8( undef, %pg, %a) + ret %ret +} + +define @sqabs_i8_undef( %a, %b) #0 { +; CHECK-LABEL: sqabs_i8_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqabs z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqabs.nxv16i8( undef, %pg, %b) + ret %ret +} + +define @sqabs_i8_active( %a, %b) #0 { +; CHECK-LABEL: sqabs_i8_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqabs z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqabs.nxv16i8( %a, %pg, %b) + ret %ret +} + +define @sqabs_i8_not_active( %a, %b) #0 { +; CHECK-LABEL: sqabs_i8_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sqabs z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %ret = tail call @llvm.aarch64.sve.sqabs.nxv16i8( %a, %pg.to, %b) + ret %ret +} + +define @sqabs_i16_dupreg( %a) #0 { +; CHECK-LABEL: sqabs_i16_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: sqabs z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqabs.nxv8i16( undef, %pg, %a) + ret %ret +} + +define @sqabs_i16_undef( %a, %b) #0 { +; CHECK-LABEL: sqabs_i16_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqabs z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqabs.nxv8i16( undef, %pg, %b) + ret %ret +} + +define @sqabs_i16_active( %a, %b) #0 { +; CHECK-LABEL: sqabs_i16_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqabs z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqabs.nxv8i16( %a, %pg, %b) + ret %ret +} + +define @sqabs_i16_not_active( %a, %b) #0 { +; CHECK-LABEL: sqabs_i16_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sqabs z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.sqabs.nxv8i16( %a, %pg.from, %b) + ret %ret +} + +define @sqabs_i32_dupreg( %a) #0 { +; CHECK-LABEL: sqabs_i32_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sqabs z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqabs.nxv4i32( undef, %pg, %a) + ret %ret +} + +define @sqabs_i32_undef( %a, %b) #0 { +; CHECK-LABEL: sqabs_i32_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqabs z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqabs.nxv4i32( undef, %pg, %b) + ret %ret +} + +define @sqabs_i32_active( %a, %b) #0 { +; CHECK-LABEL: sqabs_i32_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqabs z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqabs.nxv4i32( %a, %pg, %b) + ret %ret +} + +define @sqabs_i32_not_active( %a, %b) #0 { +; CHECK-LABEL: sqabs_i32_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sqabs z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.sqabs.nxv4i32( %a, %pg.from, %b) + ret %ret +} + +define @sqabs_i64_dupreg( %a) #0 { +; CHECK-LABEL: sqabs_i64_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sqabs z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqabs.nxv2i64( undef, %pg, %a) + ret %ret +} + +define @sqabs_i64_undef( %a, %b) #0 { +; CHECK-LABEL: sqabs_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqabs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqabs.nxv2i64( undef, %pg, %b) + ret %ret +} + +define @sqabs_i64_active( %a, %b) #0 { +; CHECK-LABEL: sqabs_i64_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqabs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqabs.nxv2i64( %a, %pg, %b) + ret %ret +} + +define @sqabs_i64_not_active( %a, %b, %pg) #0 { +; CHECK-LABEL: sqabs_i64_not_active: +; CHECK: // %bb.0: +; CHECK: sqabs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.aarch64.sve.sqabs.nxv2i64( %a, %pg, %b) + ret %ret +} + +; +; SQNEG +; + +define @sqneg_i8_dupreg( %a) #0 { +; CHECK-LABEL: sqneg_i8_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: sqneg z0.b, p0/m, z0.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqneg.nxv16i8( undef, %pg, %a) + ret %ret +} + +define @sqneg_i8_undef( %a, %b) #0 { +; CHECK-LABEL: sqneg_i8_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqneg z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqneg.nxv16i8( undef, %pg, %b) + ret %ret +} + +define @sqneg_i8_active( %a, %b) #0 { +; CHECK-LABEL: sqneg_i8_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqneg z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqneg.nxv16i8( %a, %pg, %b) + ret %ret +} + +define @sqneg_i8_not_active( %a, %b) #0 { +; CHECK-LABEL: sqneg_i8_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sqneg z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %ret = tail call @llvm.aarch64.sve.sqneg.nxv16i8( %a, %pg.to, %b) + ret %ret +} + +define @sqneg_i16_dupreg( %a) #0 { +; CHECK-LABEL: sqneg_i16_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: sqneg z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqneg.nxv8i16( undef, %pg, %a) + ret %ret +} + +define @sqneg_i16_undef( %a, %b) #0 { +; CHECK-LABEL: sqneg_i16_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqneg z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqneg.nxv8i16( undef, %pg, %b) + ret %ret +} + +define @sqneg_i16_active( %a, %b) #0 { +; CHECK-LABEL: sqneg_i16_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqneg z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqneg.nxv8i16( %a, %pg, %b) + ret %ret +} + +define @sqneg_i16_not_active( %a, %b) #0 { +; CHECK-LABEL: sqneg_i16_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sqneg z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.sqneg.nxv8i16( %a, %pg.from, %b) + ret %ret +} + +define @sqneg_i32_dupreg( %a) #0 { +; CHECK-LABEL: sqneg_i32_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sqneg z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqneg.nxv4i32( undef, %pg, %a) + ret %ret +} + +define @sqneg_i32_undef( %a, %b) #0 { +; CHECK-LABEL: sqneg_i32_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqneg z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqneg.nxv4i32( undef, %pg, %b) + ret %ret +} + +define @sqneg_i32_active( %a, %b) #0 { +; CHECK-LABEL: sqneg_i32_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqneg z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqneg.nxv4i32( %a, %pg, %b) + ret %ret +} + +define @sqneg_i32_not_active( %a, %b) #0 { +; CHECK-LABEL: sqneg_i32_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sqneg z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.sqneg.nxv4i32( %a, %pg.from, %b) + ret %ret +} + +define @sqneg_i64_dupreg( %a) #0 { +; CHECK-LABEL: sqneg_i64_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sqneg z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqneg.nxv2i64( undef, %pg, %a) + ret %ret +} + +define @sqneg_i64_undef( %a, %b) #0 { +; CHECK-LABEL: sqneg_i64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqneg z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqneg.nxv2i64( undef, %pg, %b) + ret %ret +} + +define @sqneg_i64_active( %a, %b) #0 { +; CHECK-LABEL: sqneg_i64_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: sqneg z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.sqneg.nxv2i64( %a, %pg, %b) + ret %ret +} + +define @sqneg_i64_not_active( %a, %b, %pg) #0 { +; CHECK-LABEL: sqneg_i64_not_active: +; CHECK: // %bb.0: +; CHECK: sqneg z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.aarch64.sve.sqneg.nxv2i64( %a, %pg, %b) + ret %ret +} + +; +; URECPE +; + +define @urecpe_i32_dupreg( %a) #0 { +; CHECK-LABEL: urecpe_i32_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: urecpe z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.urecpe.nxv4i32( undef, %pg, %a) + ret %ret +} + +define @urecpe_i32_undef( %a, %b) #0 { +; CHECK-LABEL: urecpe_i32_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: urecpe z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.urecpe.nxv4i32( undef, %pg, %b) + ret %ret +} + +define @urecpe_i32_active( %a, %b) #0 { +; CHECK-LABEL: urecpe_i32_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: urecpe z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.urecpe.nxv4i32( %a, %pg, %b) + ret %ret +} + +define @urecpe_i32_not_active( %a, %b) #0 { +; CHECK-LABEL: urecpe_i32_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: urecpe z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.urecpe.nxv4i32( %a, %pg.from, %b) + ret %ret +} + +; +; URSQRTE +; + +define @ursqrte_i32_dupreg( %a) #0 { +; CHECK-LABEL: ursqrte_i32_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ursqrte z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.ursqrte.nxv4i32( undef, %pg, %a) + ret %ret +} + +define @ursqrte_i32_undef( %a, %b) #0 { +; CHECK-LABEL: ursqrte_i32_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: ursqrte z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.ursqrte.nxv4i32( undef, %pg, %b) + ret %ret +} + +define @ursqrte_i32_active( %a, %b) #0 { +; CHECK-LABEL: ursqrte_i32_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: ursqrte z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.ursqrte.nxv4i32( %a, %pg, %b) + ret %ret +} + +define @ursqrte_i32_not_active( %a, %b) #0 { +; CHECK-LABEL: ursqrte_i32_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ursqrte z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.ursqrte.nxv4i32( %a, %pg.from, %b) + ret %ret +} + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv2i1(i32) + +declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv2i1() + +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv2i1() + +declare @llvm.aarch64.sve.sqabs.nxv16i8(, , ) +declare @llvm.aarch64.sve.sqabs.nxv8i16(, , ) +declare @llvm.aarch64.sve.sqabs.nxv4i32(, , ) +declare @llvm.aarch64.sve.sqabs.nxv2i64(, , ) + +declare @llvm.aarch64.sve.sqneg.nxv16i8(, , ) +declare @llvm.aarch64.sve.sqneg.nxv8i16(, , ) +declare @llvm.aarch64.sve.sqneg.nxv4i32(, , ) +declare @llvm.aarch64.sve.sqneg.nxv2i64(, , ) + +declare @llvm.aarch64.sve.urecpe.nxv4i32(, , ) +declare @llvm.aarch64.sve.ursqrte.nxv4i32(, , ) + +attributes #0 = { nounwind "target-features"="+sve2" }