diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1327,6 +1327,202 @@ return instCombineSVEVectorBinOp(IC, II); } +static std::optional +instCombineSVEAllActive2VA(InstCombiner &IC, IntrinsicInst &II) { + auto *OpPredicate = II.getOperand(0); + auto *OpA = II.getOperand(1); + auto *OpB = II.getOperand(2); + if (!match(OpPredicate, m_Intrinsic( + m_ConstantInt()))) + return std::nullopt; + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + switch (II.getIntrinsicID()) { + default: + return std::nullopt; + case Intrinsic::aarch64_sve_fabd: { + auto FABD_U = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_fabd_u, {II.getType()}, {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, FABD_U); + } + case Intrinsic::aarch64_sve_fdiv: { + auto FDIV_U = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_fdiv_u, {II.getType()}, {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, FDIV_U); + } + case Intrinsic::aarch64_sve_fmax: { + auto FMAX_U = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_fmax_u, {II.getType()}, {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, FMAX_U); + } + case Intrinsic::aarch64_sve_fmaxnm: { + auto FMAXNM_U = + Builder.CreateIntrinsic(Intrinsic::aarch64_sve_fmaxnm_u, {II.getType()}, + {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, FMAXNM_U); + } + case Intrinsic::aarch64_sve_fmin: { + auto FMIN_U = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_fmin_u, {II.getType()}, {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, FMIN_U); + } + case Intrinsic::aarch64_sve_fminnm: { + auto FMINNM_U = + Builder.CreateIntrinsic(Intrinsic::aarch64_sve_fminnm_u, {II.getType()}, + {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, FMINNM_U); + } + case Intrinsic::aarch64_sve_fmulx: { + auto FMULX_U = + Builder.CreateIntrinsic(Intrinsic::aarch64_sve_fmulx_u, {II.getType()}, + {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, FMULX_U); + } + case Intrinsic::aarch64_sve_smulh: { + auto SMULH_U = + Builder.CreateIntrinsic(Intrinsic::aarch64_sve_smulh_u, {II.getType()}, + {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, SMULH_U); + } + case Intrinsic::aarch64_sve_umulh: { + auto UMULH_U = + Builder.CreateIntrinsic(Intrinsic::aarch64_sve_umulh_u, {II.getType()}, + {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, UMULH_U); + } + case Intrinsic::aarch64_sve_smin: { + auto SMIN_U = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_smin_u, {II.getType()}, {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, SMIN_U); + } + case Intrinsic::aarch64_sve_umin: { + auto UMIN_U = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_umin_u, {II.getType()}, {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, UMIN_U); + } + case Intrinsic::aarch64_sve_smax: { + auto SMAX_U = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_smax_u, {II.getType()}, {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, SMAX_U); + } + case Intrinsic::aarch64_sve_umax: { + auto UMAX_U = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_umax_u, {II.getType()}, {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, UMAX_U); + } + case Intrinsic::aarch64_sve_sabd: { + auto SABD_U = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_sabd_u, {II.getType()}, {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, SABD_U); + } + case Intrinsic::aarch64_sve_uabd: { + auto UABD_U = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_uabd_u, {II.getType()}, {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, UABD_U); + } + case Intrinsic::aarch64_sve_asr: { + auto ASR_U = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_asr_u, {II.getType()}, {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, ASR_U); + } + case Intrinsic::aarch64_sve_lsl: { + auto LSL_U = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_lsl_u, {II.getType()}, {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, LSL_U); + } + case Intrinsic::aarch64_sve_lsr: { + auto LSR_U = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_lsr_u, {II.getType()}, {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, LSR_U); + } + case Intrinsic::aarch64_sve_and: { + auto AND_U = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_and_u, {II.getType()}, {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, AND_U); + } + case Intrinsic::aarch64_sve_bic: { + auto BIC_U = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_bic_u, {II.getType()}, {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, BIC_U); + } + case Intrinsic::aarch64_sve_eor: { + auto EOR_U = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_eor_u, {II.getType()}, {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, EOR_U); + } + case Intrinsic::aarch64_sve_orr: { + auto ORR_U = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_orr_u, {II.getType()}, {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, ORR_U); + } + case Intrinsic::aarch64_sve_sqsub: { + auto SQSUB_U = + Builder.CreateIntrinsic(Intrinsic::aarch64_sve_sqsub_u, {II.getType()}, + {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, SQSUB_U); + } + case Intrinsic::aarch64_sve_uqsub: { + auto UQSUB_U = + Builder.CreateIntrinsic(Intrinsic::aarch64_sve_uqsub_u, {II.getType()}, + {OpPredicate, OpA, OpB}); + return IC.replaceInstUsesWith(II, UQSUB_U); + } + } +} + +static std::optional +instCombineSVEAllActive3VA(InstCombiner &IC, IntrinsicInst &II) { + auto *OpPredicate = II.getOperand(0); + auto *OpA = II.getOperand(1); + auto *OpB = II.getOperand(2); + auto *OpC = II.getOperand(3); + if (!match(OpPredicate, m_Intrinsic( + m_ConstantInt()))) + return std::nullopt; + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + switch (II.getIntrinsicID()) { + default: + return std::nullopt; + case Intrinsic::aarch64_sve_fmla: { + auto FMLA_U = + Builder.CreateIntrinsic(Intrinsic::aarch64_sve_fmla_u, {II.getType()}, + {OpPredicate, OpB, OpC, OpA}); + return IC.replaceInstUsesWith(II, FMLA_U); + } + case Intrinsic::aarch64_sve_fmls: { + auto FMLS_U = + Builder.CreateIntrinsic(Intrinsic::aarch64_sve_fmls_u, {II.getType()}, + {OpPredicate, OpB, OpC, OpA}); + return IC.replaceInstUsesWith(II, FMLS_U); + } + case Intrinsic::aarch64_sve_fnmla: { + auto FNMLA_U = + Builder.CreateIntrinsic(Intrinsic::aarch64_sve_fnmla_u, {II.getType()}, + {OpPredicate, OpB, OpC, OpA}); + return IC.replaceInstUsesWith(II, FNMLA_U); + } + case Intrinsic::aarch64_sve_fnmls: { + auto FNMLS_U = + Builder.CreateIntrinsic(Intrinsic::aarch64_sve_fnmls_u, {II.getType()}, + {OpPredicate, OpB, OpC, OpA}); + return IC.replaceInstUsesWith(II, FNMLS_U); + } + case Intrinsic::aarch64_sve_mla: { + auto MLA_U = + Builder.CreateIntrinsic(Intrinsic::aarch64_sve_mla_u, {II.getType()}, + {OpPredicate, OpB, OpC, OpA}); + return IC.replaceInstUsesWith(II, MLA_U); + } + case Intrinsic::aarch64_sve_mls: { + auto MLS_U = + Builder.CreateIntrinsic(Intrinsic::aarch64_sve_mls_u, {II.getType()}, + {OpPredicate, OpB, OpC, OpA}); + return IC.replaceInstUsesWith(II, MLS_U); + } + } +} + static std::optional instCombineSVEVectorMul(InstCombiner &IC, IntrinsicInst &II) { auto *OpPredicate = II.getOperand(0); @@ -1695,6 +1891,38 @@ case Intrinsic::aarch64_sve_ptest_first: case Intrinsic::aarch64_sve_ptest_last: return instCombineSVEPTest(IC, II); + case Intrinsic::aarch64_sve_fabd: + case Intrinsic::aarch64_sve_fdiv: + case Intrinsic::aarch64_sve_fmax: + case Intrinsic::aarch64_sve_fmaxnm: + case Intrinsic::aarch64_sve_fmin: + case Intrinsic::aarch64_sve_fminnm: + case Intrinsic::aarch64_sve_fmulx: + case Intrinsic::aarch64_sve_smulh: + case Intrinsic::aarch64_sve_umulh: + case Intrinsic::aarch64_sve_smin: + case Intrinsic::aarch64_sve_umin: + case Intrinsic::aarch64_sve_smax: + case Intrinsic::aarch64_sve_umax: + case Intrinsic::aarch64_sve_sabd: + case Intrinsic::aarch64_sve_uabd: + case Intrinsic::aarch64_sve_asr: + case Intrinsic::aarch64_sve_lsl: + case Intrinsic::aarch64_sve_lsr: + case Intrinsic::aarch64_sve_and: + case Intrinsic::aarch64_sve_bic: + case Intrinsic::aarch64_sve_eor: + case Intrinsic::aarch64_sve_orr: + case Intrinsic::aarch64_sve_sqsub: + case Intrinsic::aarch64_sve_uqsub: + return instCombineSVEAllActive2VA(IC, II); + case Intrinsic::aarch64_sve_fmla: + case Intrinsic::aarch64_sve_fmls: + case Intrinsic::aarch64_sve_fnmla: + case Intrinsic::aarch64_sve_fnmls: + case Intrinsic::aarch64_sve_mla: + case Intrinsic::aarch64_sve_mls: + return instCombineSVEAllActive3VA(IC, II); case Intrinsic::aarch64_sve_mul: case Intrinsic::aarch64_sve_fmul: case Intrinsic::aarch64_sve_fmul_u: diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-m-to-x.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-m-to-x.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-m-to-x.ll @@ -0,0 +1,1749 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv2i1(i32) + +; Replace SVE merging intrinsics to their equivalent undef (_u) variants when they take an all active predicate. + +; Float arithmetics + +declare @llvm.aarch64.sve.fabd.nxv8f16(, , ) +define @replace_fabd_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fabd_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fabd.u.nxv8f16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fabd.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fabd.nxv4f32(, , ) +define @replace_fabd_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fabd_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fabd.u.nxv4f32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fabd.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fabd.nxv2f64(, , ) +define @replace_fabd_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fabd_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fabd.u.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fabd.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fabd_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fabd_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fabd.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fabd.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fdiv.nxv8f16(, , ) +define @replace_fdiv_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fdiv_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fdiv.u.nxv8f16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fdiv.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fdiv.nxv4f32(, , ) +define @replace_fdiv_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fdiv_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fdiv.u.nxv4f32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fdiv.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fdiv.nxv2f64(, , ) +define @replace_fdiv_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fdiv_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fdiv.u.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fdiv.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fdiv_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fdiv_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fdiv.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmax.nxv8f16(, , ) +define @replace_fmax_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmax_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmax.u.nxv8f16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmax.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmax.nxv4f32(, , ) +define @replace_fmax_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmax_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmax.u.nxv4f32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmax.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmax.nxv2f64(, , ) +define @replace_fmax_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmax_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmax.u.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmax.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fmax_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fmax_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmax.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fmax.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmaxnm.nxv8f16(, , ) +define @replace_fmaxnm_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmaxnm_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmaxnm.u.nxv8f16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmaxnm.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmaxnm.nxv4f32(, , ) +define @replace_fmaxnm_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmaxnm_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmaxnm.u.nxv4f32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmaxnm.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmaxnm.nxv2f64(, , ) +define @replace_fmaxnm_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmaxnm_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmax.u.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmax.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fmaxnm_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fmaxnm_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fmaxnm.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmin.nxv8f16(, , ) +define @replace_fmin_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmin_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmin.u.nxv8f16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmin.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmin.nxv4f32(, , ) +define @replace_fmin_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmin_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmin.u.nxv4f32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmin.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmin.nxv2f64(, , ) +define @replace_fmin_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmin_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmin.u.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmin.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fmin_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fmin_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmin.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fmin.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fminnm.nxv8f16(, , ) +define @replace_fminnm_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fminnm_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fminnm.u.nxv8f16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fminnm.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fminnm.nxv4f32(, , ) +define @replace_fminnm_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fminnm_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fminnm.u.nxv4f32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fminnm.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fminnm.nxv2f64(, , ) +define @replace_fminnm_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fminnm_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fminnm.u.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fminnm.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fminnm_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fminnm_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fminnm.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmla.nxv8f16(, , , ) +define @replace_fmla_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmla_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmla.u.nxv8f16( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmla.nxv8f16( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fmla.nxv4f32(, , , ) +define @replace_fmla_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmla_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmla.u.nxv4f32( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmla.nxv4f32( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fmla.nxv2f64(, , , ) +define @replace_fmla_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmla_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmla.u.nxv2f64( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmla.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +define @no_replace_fmla_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @no_replace_fmla_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmla.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fmla.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fmls.nxv8f16(, , , ) +define @replace_fmls_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmls_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmls.u.nxv8f16( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmls.nxv8f16( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fmls.nxv4f32(, , , ) +define @replace_fmls_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmls_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmls.u.nxv4f32( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmls.nxv4f32( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fmls.nxv2f64(, , , ) +define @replace_fmls_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmls_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmls.u.nxv2f64( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmls.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +define @no_replace_fmls_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @no_replace_fmls_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmls.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fmls.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fnmla.nxv8f16(, , , ) +define @replace_fnmla_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmla_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fnmla.u.nxv8f16( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fnmla.nxv8f16( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fnmla.nxv4f32(, , , ) +define @replace_fnmla_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmla_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fnmla.u.nxv4f32( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fnmla.nxv4f32( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fnmla.nxv2f64(, , , ) +define @replace_fnmla_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmla_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fnmla.u.nxv2f64( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fnmla.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +define @no_replace_fnmla_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @no_replace_fnmla_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fnmla.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fnmls.nxv8f16(, , , ) +define @replace_fnmls_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmls_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fnmls.u.nxv8f16( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fnmls.nxv8f16( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fnmls.nxv4f32(, , , ) +define @replace_fnmls_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmls_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fnmls.u.nxv4f32( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fnmls.nxv4f32( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fnmls.nxv2f64(, , , ) +define @replace_fnmls_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmls_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fnmls.u.nxv2f64( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fnmls.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +define @no_replace_fnmls_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @no_replace_fnmls_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fnmls.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +; Integer arithmetics + +declare @llvm.aarch64.sve.mla.nxv16i8(, , , ) +define @replace_mla_intrinsic_i8( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.mla.u.nxv16i8( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mla.nxv16i8( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mla.nxv8i16(, , , ) +define @replace_mla_intrinsic_i16( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.mla.u.nxv8i16( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mla.nxv8i16( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mla.nxv4i32(, , , ) +define @replace_mla_intrinsic_i32( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.mla.u.nxv4i32( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mla.nxv4i32( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mla.nxv2i64(, , , ) +define @replace_mla_intrinsic_i64( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.mla.u.nxv2i64( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mla.nxv2i64( %1, %a, %b, %c) + ret %2 +} + +define @no_replace_mla_intrinsic_i64( %a, %b, %c) #0 { +; CHECK-LABEL: define @no_replace_mla_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.mla.nxv2i64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.mla.nxv2i64( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mls.nxv16i8(, , , ) +define @replace_mls_intrinsic_i8( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.mls.u.nxv16i8( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mls.nxv16i8( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mls.nxv8i16(, , , ) +define @replace_mls_intrinsic_i16( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.mls.u.nxv8i16( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mls.nxv8i16( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mls.nxv4i32(, , , ) +define @replace_mls_intrinsic_i32( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.mls.u.nxv4i32( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mls.nxv4i32( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mls.nxv2i64(, , , ) +define @replace_mls_intrinsic_i64( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.mls.u.nxv2i64( [[TMP1]], [[B]], [[C]], [[A]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mls.nxv2i64( %1, %a, %b, %c) + ret %2 +} + +define @no_replace_mls_intrinsic_i64( %a, %b, %c) #0 { +; CHECK-LABEL: define @no_replace_mls_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.mls.nxv2i64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.mls.nxv2i64( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.smulh.nxv16i8(, , ) +define @replace_smulh_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smulh.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smulh.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smulh.nxv8i16(, , ) +define @replace_smulh_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smulh.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smulh.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smulh.nxv4i32(, , ) +define @replace_smulh_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smulh.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smulh.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smulh.nxv2i64(, , ) +define @replace_smulh_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smulh.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smulh.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_smulh_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_smulh_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smulh.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.smulh.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umulh.nxv16i8(, , ) +define @replace_umulh_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umulh.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umulh.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umulh.nxv8i16(, , ) +define @replace_umulh_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umulh.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umulh.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umulh.nxv4i32(, , ) +define @replace_umulh_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umulh.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umulh.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umulh.nxv2i64(, , ) +define @replace_umulh_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umulh.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umulh.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_umulh_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_umulh_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umulh.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.umulh.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smin.nxv16i8(, , ) +define @replace_smin_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smin.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smin.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smin.nxv8i16(, , ) +define @replace_smin_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smin.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smin.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smin.nxv4i32(, , ) +define @replace_smin_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smin.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smin.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smin.nxv2i64(, , ) +define @replace_smin_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smin.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smin.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_smin_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_smin_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smin.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.smin.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umin.nxv16i8(, , ) +define @replace_umin_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umin.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umin.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umin.nxv8i16(, , ) +define @replace_umin_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umin.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umin.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umin.nxv4i32(, , ) +define @replace_umin_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umin.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umin.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umin.nxv2i64(, , ) +define @replace_umin_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umin.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umin.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_umin_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_umin_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umin.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.umin.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smax.nxv16i8(, , ) +define @replace_smax_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smax.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smax.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smax.nxv8i16(, , ) +define @replace_smax_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smax.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smax.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smax.nxv4i32(, , ) +define @replace_smax_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smax.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smax.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smax.nxv2i64(, , ) +define @replace_smax_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smax.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smax.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_smax_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_smax_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smax.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.smax.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umax.nxv16i8(, , ) +define @replace_umax_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umax.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umax.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umax.nxv8i16(, , ) +define @replace_umax_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umax.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umax.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umax.nxv4i32(, , ) +define @replace_umax_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umax.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umax.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umax.nxv2i64(, , ) +define @replace_umax_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umax.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umax.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_umax_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_umax_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umax.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.umax.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sabd.nxv16i8(, , ) +define @replace_sabd_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.sabd.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sabd.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sabd.nxv8i16(, , ) +define @replace_sabd_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.sabd.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sabd.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sabd.nxv4i32(, , ) +define @replace_sabd_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.sabd.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sabd.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sabd.nxv2i64(, , ) +define @replace_sabd_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.sabd.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sabd.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_sabd_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_sabd_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sabd.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.sabd.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uabd.nxv16i8(, , ) +define @replace_uabd_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.uabd.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uabd.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uabd.nxv8i16(, , ) +define @replace_uabd_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.uabd.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uabd.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uabd.nxv4i32(, , ) +define @replace_uabd_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.uabd.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uabd.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uabd.nxv2i64(, , ) +define @replace_uabd_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.uabd.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uabd.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_uabd_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_uabd_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uabd.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.uabd.nxv2i64( %1, %a, %b) + ret %2 +} + +; Shifts + +declare @llvm.aarch64.sve.asr.nxv16i8(, , ) +define @replace_asr_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.asr.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.asr.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.asr.nxv8i16(, , ) +define @replace_asr_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.asr.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.asr.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.asr.nxv4i32(, , ) +define @replace_asr_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.asr.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.asr.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.asr.nxv2i64(, , ) +define @replace_asr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.asr.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.asr.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_asr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_asr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.asr.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.asr.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsl.nxv16i8(, , ) +define @replace_lsl_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.lsl.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsl.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsl.nxv8i16(, , ) +define @replace_lsl_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.lsl.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsl.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsl.nxv4i32(, , ) +define @replace_lsl_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.lsl.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsl.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsl.nxv2i64(, , ) +define @replace_lsl_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.lsl.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsl.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_lsl_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_lsl_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.lsl.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.lsl.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsr.nxv16i8(, , ) +define @replace_lsr_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.lsr.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsr.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsr.nxv8i16(, , ) +define @replace_lsr_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.lsr.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsr.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsr.nxv4i32(, , ) +define @replace_lsr_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.lsr.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsr.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsr.nxv2i64(, , ) +define @replace_lsr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.lsr.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsr.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_lsr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_lsr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.lsr.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.lsr.nxv2i64( %1, %a, %b) + ret %2 +} + +; Logical operations + +declare @llvm.aarch64.sve.and.nxv16i8(, , ) +define @replace_and_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.and.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.and.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.and.nxv8i16(, , ) +define @replace_and_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.and.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.and.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.and.nxv4i32(, , ) +define @replace_and_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.and.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.and.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.and.nxv2i64(, , ) +define @replace_and_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.and.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.and.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_and_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_and_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.and.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.and.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.bic.nxv16i8(, , ) +define @replace_bic_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.bic.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.bic.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.bic.nxv8i16(, , ) +define @replace_bic_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.bic.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.bic.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.bic.nxv4i32(, , ) +define @replace_bic_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.bic.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.bic.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.bic.nxv2i64(, , ) +define @replace_bic_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.bic.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.bic.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_bic_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_bic_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.bic.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.bic.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.eor.nxv16i8(, , ) +define @replace_eor_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.eor.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.eor.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.eor.nxv8i16(, , ) +define @replace_eor_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.eor.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.eor.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.eor.nxv4i32(, , ) +define @replace_eor_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.eor.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.eor.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.eor.nxv2i64(, , ) +define @replace_eor_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.eor.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.eor.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_eor_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_eor_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.eor.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.eor.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.orr.nxv16i8(, , ) +define @replace_orr_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.orr.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.orr.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.orr.nxv8i16(, , ) +define @replace_orr_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.orr.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.orr.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.orr.nxv4i32(, , ) +define @replace_orr_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.orr.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.orr.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.orr.nxv2i64(, , ) +define @replace_orr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.orr.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.orr.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_orr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_orr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.orr.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.orr.nxv2i64( %1, %a, %b) + ret %2 +} + +; SVE2 - Uniform DSP operations + +declare @llvm.aarch64.sve.sqsub.nxv16i8(, , ) +define @replace_sqsub_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.sqsub.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sqsub.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sqsub.nxv8i16(, , ) +define @replace_sqsub_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.sqsub.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sqsub.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sqsub.nxv4i32(, , ) +define @replace_sqsub_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.sqsub.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sqsub.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sqsub.nxv2i64(, , ) +define @replace_sqsub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.sqsub.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sqsub.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_sqsub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_sqsub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqsub.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.sqsub.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uqsub.nxv16i8(, , ) +define @replace_uqsub_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.uqsub.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uqsub.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uqsub.nxv8i16(, , ) +define @replace_uqsub_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.uqsub.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uqsub.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uqsub.nxv4i32(, , ) +define @replace_uqsub_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.uqsub.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uqsub.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uqsub.nxv2i64(, , ) +define @replace_uqsub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.uqsub.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uqsub.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_uqsub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_uqsub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uqsub.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.uqsub.nxv2i64( %1, %a, %b) + ret %2 +} + +attributes #0 = { "target-features"="+sve" }