diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1327,6 +1327,23 @@ return instCombineSVEVectorBinOp(IC, II); } +static std::optional instCombineSVEAllActive(InstCombiner &IC, + IntrinsicInst &II, + Intrinsic::ID IID) { + auto *OpPredicate = II.getOperand(0); + if (!match(OpPredicate, m_Intrinsic( + m_ConstantInt()))) + return std::nullopt; + + SmallVector Args(II.arg_size(), nullptr); + Args.clear(); + for (Value *V : II.args()) { + Args.push_back(V); + } + auto INTR_U = IC.Builder.CreateIntrinsic(IID, {II.getType()}, Args); + return IC.replaceInstUsesWith(II, INTR_U); +} + static std::optional instCombineSVEVectorMul(InstCombiner &IC, IntrinsicInst &II) { auto *OpPredicate = II.getOperand(0); @@ -1695,6 +1712,66 @@ case Intrinsic::aarch64_sve_ptest_first: case Intrinsic::aarch64_sve_ptest_last: return instCombineSVEPTest(IC, II); + case Intrinsic::aarch64_sve_fabd: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_fabd_u); + case Intrinsic::aarch64_sve_fdiv: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_fdiv_u); + case Intrinsic::aarch64_sve_fmax: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_fmax_u); + case Intrinsic::aarch64_sve_fmaxnm: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_fmaxnm_u); + case Intrinsic::aarch64_sve_fmin: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_fmin_u); + case Intrinsic::aarch64_sve_fminnm: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_fminnm_u); + case Intrinsic::aarch64_sve_fmulx: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_fmulx_u); + case Intrinsic::aarch64_sve_smulh: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_smulh_u); + case Intrinsic::aarch64_sve_umulh: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_umulh_u); + case Intrinsic::aarch64_sve_smin: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_smin_u); + case Intrinsic::aarch64_sve_umin: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_umin_u); + case Intrinsic::aarch64_sve_smax: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_smax_u); + case Intrinsic::aarch64_sve_umax: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_umax_u); + case Intrinsic::aarch64_sve_sabd: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_sabd_u); + case Intrinsic::aarch64_sve_uabd: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_uabd_u); + case Intrinsic::aarch64_sve_asr: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_asr_u); + case Intrinsic::aarch64_sve_lsl: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_lsl_u); + case Intrinsic::aarch64_sve_lsr: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_lsr_u); + case Intrinsic::aarch64_sve_and: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_and_u); + case Intrinsic::aarch64_sve_bic: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_bic_u); + case Intrinsic::aarch64_sve_eor: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_eor_u); + case Intrinsic::aarch64_sve_orr: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_orr_u); + case Intrinsic::aarch64_sve_sqsub: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_sqsub_u); + case Intrinsic::aarch64_sve_uqsub: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_uqsub_u); + case Intrinsic::aarch64_sve_fmla: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_fmla_u); + case Intrinsic::aarch64_sve_fmls: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_fmls_u); + case Intrinsic::aarch64_sve_fnmla: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_fnmla_u); + case Intrinsic::aarch64_sve_fnmls: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_fnmls_u); + case Intrinsic::aarch64_sve_mla: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_mla_u); + case Intrinsic::aarch64_sve_mls: + return instCombineSVEAllActive(IC, II, Intrinsic::aarch64_sve_mls_u); case Intrinsic::aarch64_sve_mul: case Intrinsic::aarch64_sve_mul_u: case Intrinsic::aarch64_sve_fmul: diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-m-to-x.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-m-to-x.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-m-to-x.ll @@ -0,0 +1,1749 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv2i1(i32) + +; Replace SVE merging intrinsics to their equivalent undef (_u) variants when they take an all active predicate. + +; Float arithmetics + +declare @llvm.aarch64.sve.fabd.nxv8f16(, , ) +define @replace_fabd_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fabd_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fabd.u.nxv8f16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fabd.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fabd.nxv4f32(, , ) +define @replace_fabd_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fabd_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fabd.u.nxv4f32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fabd.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fabd.nxv2f64(, , ) +define @replace_fabd_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fabd_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fabd.u.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fabd.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fabd_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fabd_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fabd.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fabd.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fdiv.nxv8f16(, , ) +define @replace_fdiv_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fdiv_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fdiv.u.nxv8f16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fdiv.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fdiv.nxv4f32(, , ) +define @replace_fdiv_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fdiv_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fdiv.u.nxv4f32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fdiv.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fdiv.nxv2f64(, , ) +define @replace_fdiv_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fdiv_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fdiv.u.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fdiv.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fdiv_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fdiv_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fdiv.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fdiv.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmax.nxv8f16(, , ) +define @replace_fmax_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmax_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmax.u.nxv8f16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmax.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmax.nxv4f32(, , ) +define @replace_fmax_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmax_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmax.u.nxv4f32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmax.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmax.nxv2f64(, , ) +define @replace_fmax_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmax_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmax.u.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmax.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fmax_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fmax_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmax.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fmax.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmaxnm.nxv8f16(, , ) +define @replace_fmaxnm_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmaxnm_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmaxnm.u.nxv8f16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmaxnm.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmaxnm.nxv4f32(, , ) +define @replace_fmaxnm_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmaxnm_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmaxnm.u.nxv4f32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmaxnm.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmaxnm.nxv2f64(, , ) +define @replace_fmaxnm_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmaxnm_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmax.u.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmax.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fmaxnm_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fmaxnm_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmaxnm.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fmaxnm.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmin.nxv8f16(, , ) +define @replace_fmin_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmin_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmin.u.nxv8f16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmin.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmin.nxv4f32(, , ) +define @replace_fmin_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmin_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmin.u.nxv4f32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmin.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmin.nxv2f64(, , ) +define @replace_fmin_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fmin_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmin.u.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmin.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fmin_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fmin_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmin.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fmin.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fminnm.nxv8f16(, , ) +define @replace_fminnm_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: define @replace_fminnm_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fminnm.u.nxv8f16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fminnm.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fminnm.nxv4f32(, , ) +define @replace_fminnm_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: define @replace_fminnm_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fminnm.u.nxv4f32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fminnm.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fminnm.nxv2f64(, , ) +define @replace_fminnm_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @replace_fminnm_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fminnm.u.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fminnm.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_fminnm_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_fminnm_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fminnm.nxv2f64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fminnm.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmla.nxv8f16(, , , ) +define @replace_fmla_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmla_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmla.u.nxv8f16( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmla.nxv8f16( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fmla.nxv4f32(, , , ) +define @replace_fmla_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmla_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmla.u.nxv4f32( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmla.nxv4f32( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fmla.nxv2f64(, , , ) +define @replace_fmla_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmla_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmla.u.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmla.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +define @no_replace_fmla_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @no_replace_fmla_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmla.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fmla.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fmls.nxv8f16(, , , ) +define @replace_fmls_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmls_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmls.u.nxv8f16( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmls.nxv8f16( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fmls.nxv4f32(, , , ) +define @replace_fmls_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmls_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmls.u.nxv4f32( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmls.nxv4f32( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fmls.nxv2f64(, , , ) +define @replace_fmls_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fmls_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmls.u.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmls.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +define @no_replace_fmls_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @no_replace_fmls_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmls.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fmls.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fnmla.nxv8f16(, , , ) +define @replace_fnmla_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmla_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fnmla.u.nxv8f16( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fnmla.nxv8f16( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fnmla.nxv4f32(, , , ) +define @replace_fnmla_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmla_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fnmla.u.nxv4f32( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fnmla.nxv4f32( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fnmla.nxv2f64(, , , ) +define @replace_fnmla_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmla_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fnmla.u.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fnmla.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +define @no_replace_fnmla_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @no_replace_fnmla_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fnmla.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fnmla.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fnmls.nxv8f16(, , , ) +define @replace_fnmls_intrinsic_half( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmls_intrinsic_half +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fnmls.u.nxv8f16( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fnmls.nxv8f16( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fnmls.nxv4f32(, , , ) +define @replace_fnmls_intrinsic_float( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmls_intrinsic_float +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fnmls.u.nxv4f32( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fnmls.nxv4f32( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.fnmls.nxv2f64(, , , ) +define @replace_fnmls_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_fnmls_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fnmls.u.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fnmls.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +define @no_replace_fnmls_intrinsic_double( %a, %b, %c) #0 { +; CHECK-LABEL: define @no_replace_fnmls_intrinsic_double +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fnmls.nxv2f64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fnmls.nxv2f64( %1, %a, %b, %c) + ret %2 +} + +; Integer arithmetics + +declare @llvm.aarch64.sve.mla.nxv16i8(, , , ) +define @replace_mla_intrinsic_i8( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.mla.u.nxv16i8( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mla.nxv16i8( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mla.nxv8i16(, , , ) +define @replace_mla_intrinsic_i16( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.mla.u.nxv8i16( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mla.nxv8i16( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mla.nxv4i32(, , , ) +define @replace_mla_intrinsic_i32( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.mla.u.nxv4i32( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mla.nxv4i32( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mla.nxv2i64(, , , ) +define @replace_mla_intrinsic_i64( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mla_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.mla.u.nxv2i64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mla.nxv2i64( %1, %a, %b, %c) + ret %2 +} + +define @no_replace_mla_intrinsic_i64( %a, %b, %c) #0 { +; CHECK-LABEL: define @no_replace_mla_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.mla.nxv2i64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.mla.nxv2i64( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mls.nxv16i8(, , , ) +define @replace_mls_intrinsic_i8( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.mls.u.nxv16i8( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mls.nxv16i8( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mls.nxv8i16(, , , ) +define @replace_mls_intrinsic_i16( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.mls.u.nxv8i16( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mls.nxv8i16( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mls.nxv4i32(, , , ) +define @replace_mls_intrinsic_i32( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.mls.u.nxv4i32( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mls.nxv4i32( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.mls.nxv2i64(, , , ) +define @replace_mls_intrinsic_i64( %a, %b, %c) #0 { +; CHECK-LABEL: define @replace_mls_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.mls.u.nxv2i64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.mls.nxv2i64( %1, %a, %b, %c) + ret %2 +} + +define @no_replace_mls_intrinsic_i64( %a, %b, %c) #0 { +; CHECK-LABEL: define @no_replace_mls_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]], [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.mls.nxv2i64( [[TMP1]], [[A]], [[B]], [[C]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.mls.nxv2i64( %1, %a, %b, %c) + ret %2 +} + +declare @llvm.aarch64.sve.smulh.nxv16i8(, , ) +define @replace_smulh_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smulh.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smulh.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smulh.nxv8i16(, , ) +define @replace_smulh_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smulh.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smulh.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smulh.nxv4i32(, , ) +define @replace_smulh_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smulh.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smulh.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smulh.nxv2i64(, , ) +define @replace_smulh_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_smulh_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smulh.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smulh.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_smulh_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_smulh_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smulh.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.smulh.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umulh.nxv16i8(, , ) +define @replace_umulh_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umulh.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umulh.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umulh.nxv8i16(, , ) +define @replace_umulh_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umulh.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umulh.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umulh.nxv4i32(, , ) +define @replace_umulh_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umulh.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umulh.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umulh.nxv2i64(, , ) +define @replace_umulh_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_umulh_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umulh.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umulh.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_umulh_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_umulh_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umulh.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.umulh.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smin.nxv16i8(, , ) +define @replace_smin_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smin.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smin.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smin.nxv8i16(, , ) +define @replace_smin_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smin.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smin.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smin.nxv4i32(, , ) +define @replace_smin_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smin.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smin.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smin.nxv2i64(, , ) +define @replace_smin_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_smin_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smin.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smin.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_smin_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_smin_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smin.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.smin.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umin.nxv16i8(, , ) +define @replace_umin_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umin.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umin.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umin.nxv8i16(, , ) +define @replace_umin_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umin.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umin.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umin.nxv4i32(, , ) +define @replace_umin_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umin.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umin.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umin.nxv2i64(, , ) +define @replace_umin_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_umin_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umin.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umin.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_umin_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_umin_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umin.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.umin.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smax.nxv16i8(, , ) +define @replace_smax_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smax.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smax.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smax.nxv8i16(, , ) +define @replace_smax_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smax.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smax.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smax.nxv4i32(, , ) +define @replace_smax_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smax.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smax.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.smax.nxv2i64(, , ) +define @replace_smax_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_smax_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.smax.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.smax.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_smax_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_smax_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.smax.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.smax.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umax.nxv16i8(, , ) +define @replace_umax_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umax.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umax.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umax.nxv8i16(, , ) +define @replace_umax_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umax.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umax.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umax.nxv4i32(, , ) +define @replace_umax_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umax.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umax.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.umax.nxv2i64(, , ) +define @replace_umax_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_umax_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.umax.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.umax.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_umax_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_umax_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.umax.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.umax.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sabd.nxv16i8(, , ) +define @replace_sabd_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.sabd.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sabd.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sabd.nxv8i16(, , ) +define @replace_sabd_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.sabd.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sabd.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sabd.nxv4i32(, , ) +define @replace_sabd_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.sabd.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sabd.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sabd.nxv2i64(, , ) +define @replace_sabd_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_sabd_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.sabd.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sabd.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_sabd_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_sabd_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sabd.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.sabd.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uabd.nxv16i8(, , ) +define @replace_uabd_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.uabd.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uabd.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uabd.nxv8i16(, , ) +define @replace_uabd_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.uabd.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uabd.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uabd.nxv4i32(, , ) +define @replace_uabd_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.uabd.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uabd.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uabd.nxv2i64(, , ) +define @replace_uabd_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_uabd_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.uabd.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uabd.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_uabd_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_uabd_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uabd.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.uabd.nxv2i64( %1, %a, %b) + ret %2 +} + +; Shifts + +declare @llvm.aarch64.sve.asr.nxv16i8(, , ) +define @replace_asr_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.asr.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.asr.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.asr.nxv8i16(, , ) +define @replace_asr_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.asr.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.asr.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.asr.nxv4i32(, , ) +define @replace_asr_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.asr.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.asr.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.asr.nxv2i64(, , ) +define @replace_asr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_asr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.asr.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.asr.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_asr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_asr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.asr.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.asr.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsl.nxv16i8(, , ) +define @replace_lsl_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.lsl.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsl.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsl.nxv8i16(, , ) +define @replace_lsl_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.lsl.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsl.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsl.nxv4i32(, , ) +define @replace_lsl_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.lsl.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsl.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsl.nxv2i64(, , ) +define @replace_lsl_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsl_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.lsl.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsl.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_lsl_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_lsl_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.lsl.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.lsl.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsr.nxv16i8(, , ) +define @replace_lsr_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.lsr.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsr.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsr.nxv8i16(, , ) +define @replace_lsr_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.lsr.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsr.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsr.nxv4i32(, , ) +define @replace_lsr_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.lsr.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsr.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.lsr.nxv2i64(, , ) +define @replace_lsr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_lsr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.lsr.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.lsr.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_lsr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_lsr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.lsr.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.lsr.nxv2i64( %1, %a, %b) + ret %2 +} + +; Logical operations + +declare @llvm.aarch64.sve.and.nxv16i8(, , ) +define @replace_and_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.and.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.and.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.and.nxv8i16(, , ) +define @replace_and_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.and.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.and.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.and.nxv4i32(, , ) +define @replace_and_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.and.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.and.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.and.nxv2i64(, , ) +define @replace_and_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_and_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.and.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.and.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_and_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_and_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.and.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.and.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.bic.nxv16i8(, , ) +define @replace_bic_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.bic.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.bic.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.bic.nxv8i16(, , ) +define @replace_bic_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.bic.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.bic.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.bic.nxv4i32(, , ) +define @replace_bic_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.bic.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.bic.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.bic.nxv2i64(, , ) +define @replace_bic_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_bic_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.bic.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.bic.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_bic_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_bic_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.bic.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.bic.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.eor.nxv16i8(, , ) +define @replace_eor_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.eor.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.eor.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.eor.nxv8i16(, , ) +define @replace_eor_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.eor.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.eor.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.eor.nxv4i32(, , ) +define @replace_eor_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.eor.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.eor.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.eor.nxv2i64(, , ) +define @replace_eor_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_eor_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.eor.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.eor.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_eor_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_eor_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.eor.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.eor.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.orr.nxv16i8(, , ) +define @replace_orr_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.orr.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.orr.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.orr.nxv8i16(, , ) +define @replace_orr_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.orr.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.orr.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.orr.nxv4i32(, , ) +define @replace_orr_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.orr.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.orr.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.orr.nxv2i64(, , ) +define @replace_orr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_orr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.orr.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.orr.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_orr_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_orr_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.orr.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.orr.nxv2i64( %1, %a, %b) + ret %2 +} + +; SVE2 - Uniform DSP operations + +declare @llvm.aarch64.sve.sqsub.nxv16i8(, , ) +define @replace_sqsub_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.sqsub.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sqsub.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sqsub.nxv8i16(, , ) +define @replace_sqsub_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.sqsub.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sqsub.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sqsub.nxv4i32(, , ) +define @replace_sqsub_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.sqsub.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sqsub.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.sqsub.nxv2i64(, , ) +define @replace_sqsub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_sqsub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.sqsub.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.sqsub.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_sqsub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_sqsub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqsub.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.sqsub.nxv2i64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uqsub.nxv16i8(, , ) +define @replace_uqsub_intrinsic_i8( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i8 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.uqsub.u.nxv16i8( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uqsub.nxv16i8( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uqsub.nxv8i16(, , ) +define @replace_uqsub_intrinsic_i16( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i16 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.uqsub.u.nxv8i16( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uqsub.nxv8i16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uqsub.nxv4i32(, , ) +define @replace_uqsub_intrinsic_i32( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i32 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.uqsub.u.nxv4i32( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uqsub.nxv4i32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.uqsub.nxv2i64(, , ) +define @replace_uqsub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @replace_uqsub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.uqsub.u.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.uqsub.nxv2i64( %1, %a, %b) + ret %2 +} + +define @no_replace_uqsub_intrinsic_i64( %a, %b) #0 { +; CHECK-LABEL: define @no_replace_uqsub_intrinsic_i64 +; CHECK-SAME: ( [[A:%.*]], [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uqsub.nxv2i64( [[TMP1]], [[A]], [[B]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call @llvm.aarch64.sve.uqsub.nxv2i64( %1, %a, %b) + ret %2 +} + +attributes #0 = { "target-features"="+sve" }