Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7977,15 +7977,34 @@ return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags()); } + auto IsConstantNonZero = [](SDValue N) { + if (auto *Input = dyn_cast(N)) + return !Input->isZero(); + if (N->getOpcode() == ISD::BUILD_VECTOR || + N->getOpcode() == ISD::SPLAT_VECTOR) { + for (const SDValue &Op : N->op_values()) { + if (Op.isUndef()) + return false; + if (auto *Const = dyn_cast(Op)) + if (Const->isZero()) + return false; + } + return true; + } + return false; + }; // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that - // instead if there are no NaNs. - if (Node->getFlags().hasNoNaNs()) { + // instead if there are no NaNs and there can't be an incompatiable zero + // compare: at least one operand isn't +/-0, or there are no signed-zeros. + if (Node->getFlags().hasNoNaNs() && + (Node->getFlags().hasNoSignedZeros() || + IsConstantNonZero(Node->getOperand(0)) || + IsConstantNonZero(Node->getOperand(1)))) { unsigned IEEE2018Op = Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM; - if (isOperationLegalOrCustom(IEEE2018Op, VT)) { + if (isOperationLegalOrCustom(IEEE2018Op, VT)) return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0), Node->getOperand(1), Node->getFlags()); - } } if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG)) Index: llvm/test/CodeGen/ARM/lower-vmax.ll =================================================================== --- llvm/test/CodeGen/ARM/lower-vmax.ll +++ llvm/test/CodeGen/ARM/lower-vmax.ll @@ -1,11 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=arm-eabihf -mattr=+neon < %s | FileCheck -check-prefixes=CHECK-NO_NEON %s ; RUN: llc -mtriple=arm-eabihf -mattr=+neon,+neonfp < %s | FileCheck -check-prefixes=CHECK-NEON %s define float @max_f32(float, float) { -;CHECK-NEON: vmax.f32 -;CHECK-NO_NEON: vcmp.f32 -;CHECK-NO_NEON: vmrs -;CHECK-NO_NEON: vmovgt.f32 +; CHECK-NO_NEON-LABEL: max_f32: +; CHECK-NO_NEON: @ %bb.0: +; CHECK-NO_NEON-NEXT: vcmp.f32 s1, s0 +; CHECK-NO_NEON-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NO_NEON-NEXT: vmovgt.f32 s0, s1 +; CHECK-NO_NEON-NEXT: mov pc, lr +; +; CHECK-NEON-LABEL: max_f32: +; CHECK-NEON: @ %bb.0: +; CHECK-NEON-NEXT: vcmp.f32 s1, s0 +; CHECK-NEON-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEON-NEXT: vmovgt.f32 s0, s1 +; CHECK-NEON-NEXT: mov pc, lr %3 = call nnan float @llvm.maxnum.f32(float %1, float %0) ret float %3 } @@ -13,10 +23,19 @@ declare float @llvm.maxnum.f32(float, float) #1 define float @min_f32(float, float) { -;CHECK-NEON: vmin.f32 -;CHECK-NO_NEON: vcmp.f32 -;CHECK-NO_NEON: vmrs -;CHECK-NO_NEON: vmovlt.f32 +; CHECK-NO_NEON-LABEL: min_f32: +; CHECK-NO_NEON: @ %bb.0: +; CHECK-NO_NEON-NEXT: vcmp.f32 s1, s0 +; CHECK-NO_NEON-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NO_NEON-NEXT: vmovlt.f32 s0, s1 +; CHECK-NO_NEON-NEXT: mov pc, lr +; +; CHECK-NEON-LABEL: min_f32: +; CHECK-NEON: @ %bb.0: +; CHECK-NEON-NEXT: vcmp.f32 s1, s0 +; CHECK-NEON-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEON-NEXT: vmovlt.f32 s0, s1 +; CHECK-NEON-NEXT: mov pc, lr %3 = call nnan float @llvm.minnum.f32(float %1, float %0) ret float %3 } Index: llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll @@ -0,0 +1,1458 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=armv7 -mattr=+neon %s -o - | FileCheck %s --check-prefix=ARMV7 +; RUN: llc -mtriple=armv8.2-a -mattr=+fp-armv8 %s -o - | FileCheck %s --check-prefix=ARMV8 +; RUN: llc -mtriple=armv8.1m-none-none-eabi -mattr=+mve.fp,+fp64 %s -o - | FileCheck %s --check-prefix=ARMV8M + +declare float @llvm.minnum.f32(float, float) +declare float @llvm.maxnum.f32(float, float) +declare double @llvm.minnum.f64(double, double) +declare double @llvm.maxnum.f64(double, double) +declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) +declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) +declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) +declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) + +define float @fminnum32_intrinsic(float %x, float %y) { +; ARMV7-LABEL: fminnum32_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov s0, r0 +; ARMV7-NEXT: vmov s2, r1 +; ARMV7-NEXT: vcmp.f32 s0, s2 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmovlt.f32 s2, s0 +; ARMV7-NEXT: vmov r0, s2 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fminnum32_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vmov s0, r1 +; ARMV8-NEXT: vmov s2, r0 +; ARMV8-NEXT: vminnm.f32 s0, s2, s0 +; ARMV8-NEXT: vmov r0, s0 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fminnum32_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov s0, r1 +; ARMV8M-NEXT: vmov s2, r0 +; ARMV8M-NEXT: vminnm.f32 s0, s2, s0 +; ARMV8M-NEXT: vmov r0, s0 +; ARMV8M-NEXT: bx lr + %a = call nnan float @llvm.minnum.f32(float %x, float %y) + ret float %a +} + +define float @fminnum32_nsz_intrinsic(float %x, float %y) { +; ARMV7-LABEL: fminnum32_nsz_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov s0, r0 +; ARMV7-NEXT: vmov s2, r1 +; ARMV7-NEXT: vcmp.f32 s0, s2 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmovlt.f32 s2, s0 +; ARMV7-NEXT: vmov r0, s2 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fminnum32_nsz_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vmov s0, r1 +; ARMV8-NEXT: vmov s2, r0 +; ARMV8-NEXT: vminnm.f32 s0, s2, s0 +; ARMV8-NEXT: vmov r0, s0 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fminnum32_nsz_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov s0, r1 +; ARMV8M-NEXT: vmov s2, r0 +; ARMV8M-NEXT: vminnm.f32 s0, s2, s0 +; ARMV8M-NEXT: vmov r0, s0 +; ARMV8M-NEXT: bx lr + %a = call nnan nsz float @llvm.minnum.f32(float %x, float %y) + ret float %a +} + +define float @fminnum32_non_zero_intrinsic(float %x) { +; ARMV7-LABEL: fminnum32_non_zero_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov.f32 s0, #-1.000000e+00 +; ARMV7-NEXT: vmov s2, r0 +; ARMV7-NEXT: vcmp.f32 s2, s0 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmovlt.f32 s0, s2 +; ARMV7-NEXT: vmov r0, s0 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fminnum32_non_zero_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vmov.f32 s0, #-1.000000e+00 +; ARMV8-NEXT: vmov s2, r0 +; ARMV8-NEXT: vminnm.f32 s0, s2, s0 +; ARMV8-NEXT: vmov r0, s0 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fminnum32_non_zero_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov.f32 s0, #-1.000000e+00 +; ARMV8M-NEXT: vmov s2, r0 +; ARMV8M-NEXT: vminnm.f32 s0, s2, s0 +; ARMV8M-NEXT: vmov r0, s0 +; ARMV8M-NEXT: bx lr + %a = call nnan float @llvm.minnum.f32(float %x, float -1.0) + ret float %a +} + +define float @fmaxnum32_intrinsic(float %x, float %y) { +; ARMV7-LABEL: fmaxnum32_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov s0, r0 +; ARMV7-NEXT: vmov s2, r1 +; ARMV7-NEXT: vcmp.f32 s0, s2 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmovgt.f32 s2, s0 +; ARMV7-NEXT: vmov r0, s2 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fmaxnum32_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vmov s0, r1 +; ARMV8-NEXT: vmov s2, r0 +; ARMV8-NEXT: vmaxnm.f32 s0, s2, s0 +; ARMV8-NEXT: vmov r0, s0 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fmaxnum32_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov s0, r1 +; ARMV8M-NEXT: vmov s2, r0 +; ARMV8M-NEXT: vmaxnm.f32 s0, s2, s0 +; ARMV8M-NEXT: vmov r0, s0 +; ARMV8M-NEXT: bx lr + %a = call nnan float @llvm.maxnum.f32(float %x, float %y) + ret float %a +} + +define float @fmaxnum32_nsz_intrinsic(float %x, float %y) { +; ARMV7-LABEL: fmaxnum32_nsz_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov s0, r0 +; ARMV7-NEXT: vmov s2, r1 +; ARMV7-NEXT: vcmp.f32 s0, s2 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmovgt.f32 s2, s0 +; ARMV7-NEXT: vmov r0, s2 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fmaxnum32_nsz_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vmov s0, r1 +; ARMV8-NEXT: vmov s2, r0 +; ARMV8-NEXT: vmaxnm.f32 s0, s2, s0 +; ARMV8-NEXT: vmov r0, s0 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fmaxnum32_nsz_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov s0, r1 +; ARMV8M-NEXT: vmov s2, r0 +; ARMV8M-NEXT: vmaxnm.f32 s0, s2, s0 +; ARMV8M-NEXT: vmov r0, s0 +; ARMV8M-NEXT: bx lr + %a = call nnan nsz float @llvm.maxnum.f32(float %x, float %y) + ret float %a +} + +define float @fmaxnum32_zero_intrinsic(float %x) { +; ARMV7-LABEL: fmaxnum32_zero_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov s2, r0 +; ARMV7-NEXT: vldr s0, .LCPI5_0 +; ARMV7-NEXT: vcmp.f32 s2, #0 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmovgt.f32 s0, s2 +; ARMV7-NEXT: vmov r0, s0 +; ARMV7-NEXT: bx lr +; ARMV7-NEXT: .p2align 2 +; ARMV7-NEXT: @ %bb.1: +; ARMV7-NEXT: .LCPI5_0: +; ARMV7-NEXT: .long 0x00000000 @ float 0 +; +; ARMV8-LABEL: fmaxnum32_zero_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vldr s0, .LCPI5_0 +; ARMV8-NEXT: vmov s2, r0 +; ARMV8-NEXT: vmaxnm.f32 s0, s2, s0 +; ARMV8-NEXT: vmov r0, s0 +; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: .p2align 2 +; ARMV8-NEXT: @ %bb.1: +; ARMV8-NEXT: .LCPI5_0: +; ARMV8-NEXT: .long 0x00000000 @ float 0 +; +; ARMV8M-LABEL: fmaxnum32_zero_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vldr s0, .LCPI5_0 +; ARMV8M-NEXT: vmov s2, r0 +; ARMV8M-NEXT: vmaxnm.f32 s0, s2, s0 +; ARMV8M-NEXT: vmov r0, s0 +; ARMV8M-NEXT: bx lr +; ARMV8M-NEXT: .p2align 2 +; ARMV8M-NEXT: @ %bb.1: +; ARMV8M-NEXT: .LCPI5_0: +; ARMV8M-NEXT: .long 0x00000000 @ float 0 + %a = call nnan float @llvm.maxnum.f32(float %x, float 0.0) + ret float %a +} + +define float @fmaxnum32_non_zero_intrinsic(float %x) { +; ARMV7-LABEL: fmaxnum32_non_zero_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov.f32 s0, #1.000000e+00 +; ARMV7-NEXT: vmov s2, r0 +; ARMV7-NEXT: vcmp.f32 s2, s0 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmovgt.f32 s0, s2 +; ARMV7-NEXT: vmov r0, s0 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fmaxnum32_non_zero_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vmov.f32 s0, #1.000000e+00 +; ARMV8-NEXT: vmov s2, r0 +; ARMV8-NEXT: vmaxnm.f32 s0, s2, s0 +; ARMV8-NEXT: vmov r0, s0 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fmaxnum32_non_zero_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov.f32 s0, #1.000000e+00 +; ARMV8M-NEXT: vmov s2, r0 +; ARMV8M-NEXT: vmaxnm.f32 s0, s2, s0 +; ARMV8M-NEXT: vmov r0, s0 +; ARMV8M-NEXT: bx lr + %a = call nnan float @llvm.maxnum.f32(float %x, float 1.0) + ret float %a +} + +define double @fminnum64_intrinsic(double %x, double %y) { +; ARMV7-LABEL: fminnum64_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov d16, r2, r3 +; ARMV7-NEXT: vmov d17, r0, r1 +; ARMV7-NEXT: vcmp.f64 d17, d16 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmovlt.f64 d16, d17 +; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fminnum64_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vmov d16, r2, r3 +; ARMV8-NEXT: vmov d17, r0, r1 +; ARMV8-NEXT: vminnm.f64 d16, d17, d16 +; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fminnum64_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov d0, r2, r3 +; ARMV8M-NEXT: vmov d1, r0, r1 +; ARMV8M-NEXT: vminnm.f64 d0, d1, d0 +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: bx lr + %a = call nnan double @llvm.minnum.f64(double %x, double %y) + ret double %a +} + +define double @fminnum64_nsz_intrinsic(double %x, double %y) { +; ARMV7-LABEL: fminnum64_nsz_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov d16, r2, r3 +; ARMV7-NEXT: vmov d17, r0, r1 +; ARMV7-NEXT: vcmp.f64 d17, d16 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmovlt.f64 d16, d17 +; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fminnum64_nsz_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vmov d16, r2, r3 +; ARMV8-NEXT: vmov d17, r0, r1 +; ARMV8-NEXT: vminnm.f64 d16, d17, d16 +; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fminnum64_nsz_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov d0, r2, r3 +; ARMV8M-NEXT: vmov d1, r0, r1 +; ARMV8M-NEXT: vminnm.f64 d0, d1, d0 +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: bx lr + %a = call nnan nsz double @llvm.minnum.f64(double %x, double %y) + ret double %a +} + +define double @fminnum64_zero_intrinsic(double %x) { +; ARMV7-LABEL: fminnum64_zero_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vldr d16, .LCPI9_0 +; ARMV7-NEXT: vmov d17, r0, r1 +; ARMV7-NEXT: vcmp.f64 d17, d16 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmovlt.f64 d16, d17 +; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: bx lr +; ARMV7-NEXT: .p2align 3 +; ARMV7-NEXT: @ %bb.1: +; ARMV7-NEXT: .LCPI9_0: +; ARMV7-NEXT: .long 0 @ double -0 +; ARMV7-NEXT: .long 2147483648 +; +; ARMV8-LABEL: fminnum64_zero_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vldr d16, .LCPI9_0 +; ARMV8-NEXT: vmov d17, r0, r1 +; ARMV8-NEXT: vminnm.f64 d16, d17, d16 +; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: .p2align 3 +; ARMV8-NEXT: @ %bb.1: +; ARMV8-NEXT: .LCPI9_0: +; ARMV8-NEXT: .long 0 @ double -0 +; ARMV8-NEXT: .long 2147483648 +; +; ARMV8M-LABEL: fminnum64_zero_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vldr d0, .LCPI9_0 +; ARMV8M-NEXT: vmov d1, r0, r1 +; ARMV8M-NEXT: vminnm.f64 d0, d1, d0 +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: bx lr +; ARMV8M-NEXT: .p2align 3 +; ARMV8M-NEXT: @ %bb.1: +; ARMV8M-NEXT: .LCPI9_0: +; ARMV8M-NEXT: .long 0 @ double -0 +; ARMV8M-NEXT: .long 2147483648 + %a = call nnan double @llvm.minnum.f64(double %x, double -0.0) + ret double %a +} + +define double @fminnum64_non_zero_intrinsic(double %x) { +; ARMV7-LABEL: fminnum64_non_zero_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov.f64 d16, #-1.000000e+00 +; ARMV7-NEXT: vmov d17, r0, r1 +; ARMV7-NEXT: vcmp.f64 d17, d16 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmovlt.f64 d16, d17 +; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fminnum64_non_zero_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vmov.f64 d16, #-1.000000e+00 +; ARMV8-NEXT: vmov d17, r0, r1 +; ARMV8-NEXT: vminnm.f64 d16, d17, d16 +; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fminnum64_non_zero_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov.f64 d0, #-1.000000e+00 +; ARMV8M-NEXT: vmov d1, r0, r1 +; ARMV8M-NEXT: vminnm.f64 d0, d1, d0 +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: bx lr + %a = call nnan double @llvm.minnum.f64(double %x, double -1.0) + ret double %a +} + +define double@fmaxnum64_intrinsic(double %x, double %y) { +; ARMV7-LABEL: fmaxnum64_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov d16, r2, r3 +; ARMV7-NEXT: vmov d17, r0, r1 +; ARMV7-NEXT: vcmp.f64 d17, d16 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmovgt.f64 d16, d17 +; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fmaxnum64_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vmov d16, r2, r3 +; ARMV8-NEXT: vmov d17, r0, r1 +; ARMV8-NEXT: vmaxnm.f64 d16, d17, d16 +; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fmaxnum64_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov d0, r2, r3 +; ARMV8M-NEXT: vmov d1, r0, r1 +; ARMV8M-NEXT: vmaxnm.f64 d0, d1, d0 +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: bx lr + %a = call nnan double @llvm.maxnum.f64(double %x, double %y) + ret double %a +} + +define double@fmaxnum64_nsz_intrinsic(double %x, double %y) { +; ARMV7-LABEL: fmaxnum64_nsz_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov d16, r2, r3 +; ARMV7-NEXT: vmov d17, r0, r1 +; ARMV7-NEXT: vcmp.f64 d17, d16 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmovgt.f64 d16, d17 +; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fmaxnum64_nsz_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vmov d16, r2, r3 +; ARMV8-NEXT: vmov d17, r0, r1 +; ARMV8-NEXT: vmaxnm.f64 d16, d17, d16 +; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fmaxnum64_nsz_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov d0, r2, r3 +; ARMV8M-NEXT: vmov d1, r0, r1 +; ARMV8M-NEXT: vmaxnm.f64 d0, d1, d0 +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: bx lr + %a = call nnan nsz double @llvm.maxnum.f64(double %x, double %y) + ret double %a +} + +define double @fmaxnum64_zero_intrinsic(double %x) { +; ARMV7-LABEL: fmaxnum64_zero_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov d17, r0, r1 +; ARMV7-NEXT: vcmp.f64 d17, #0 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov.i32 d16, #0x0 +; ARMV7-NEXT: vmovgt.f64 d16, d17 +; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fmaxnum64_zero_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vldr d16, .LCPI13_0 +; ARMV8-NEXT: vmov d17, r0, r1 +; ARMV8-NEXT: vmaxnm.f64 d16, d17, d16 +; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: .p2align 3 +; ARMV8-NEXT: @ %bb.1: +; ARMV8-NEXT: .LCPI13_0: +; ARMV8-NEXT: .long 0 @ double 0 +; ARMV8-NEXT: .long 0 +; +; ARMV8M-LABEL: fmaxnum64_zero_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vldr d0, .LCPI13_0 +; ARMV8M-NEXT: vmov d1, r0, r1 +; ARMV8M-NEXT: vmaxnm.f64 d0, d1, d0 +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: bx lr +; ARMV8M-NEXT: .p2align 3 +; ARMV8M-NEXT: @ %bb.1: +; ARMV8M-NEXT: .LCPI13_0: +; ARMV8M-NEXT: .long 0 @ double 0 +; ARMV8M-NEXT: .long 0 + %a = call nnan double @llvm.maxnum.f64(double %x, double 0.0) + ret double %a +} + +define double @fmaxnum64_non_zero_intrinsic(double %x) { +; ARMV7-LABEL: fmaxnum64_non_zero_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov.f64 d16, #1.000000e+00 +; ARMV7-NEXT: vmov d17, r0, r1 +; ARMV7-NEXT: vcmp.f64 d17, d16 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmovgt.f64 d16, d17 +; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fmaxnum64_non_zero_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vmov.f64 d16, #1.000000e+00 +; ARMV8-NEXT: vmov d17, r0, r1 +; ARMV8-NEXT: vmaxnm.f64 d16, d17, d16 +; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fmaxnum64_non_zero_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov.f64 d0, #1.000000e+00 +; ARMV8M-NEXT: vmov d1, r0, r1 +; ARMV8M-NEXT: vmaxnm.f64 d0, d1, d0 +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: bx lr + %a = call nnan double @llvm.maxnum.f64(double %x, double 1.0) + ret double %a +} + +define <4 x float> @fminnumv432_intrinsic(<4 x float> %x, <4 x float> %y) { +; ARMV7-LABEL: fminnumv432_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: mov r12, sp +; ARMV7-NEXT: vld1.64 {d0, d1}, [r12] +; ARMV7-NEXT: vmov d3, r2, r3 +; ARMV7-NEXT: vmov d2, r0, r1 +; ARMV7-NEXT: vcmp.f32 s7, s3 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vcmp.f32 s6, s2 +; ARMV7-NEXT: vmovlt.f32 s3, s7 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vcmp.f32 s5, s1 +; ARMV7-NEXT: vmovlt.f32 s2, s6 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vcmp.f32 s4, s0 +; ARMV7-NEXT: vmovlt.f32 s1, s5 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmovlt.f32 s0, s4 +; ARMV7-NEXT: vmov r2, r3, d1 +; ARMV7-NEXT: vmov r0, r1, d0 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fminnumv432_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vldr s0, [sp, #4] +; ARMV8-NEXT: vmov s12, r1 +; ARMV8-NEXT: vldr s2, [sp, #8] +; ARMV8-NEXT: vmov s10, r2 +; ARMV8-NEXT: vminnm.f32 s0, s12, s0 +; ARMV8-NEXT: vldr s4, [sp, #12] +; ARMV8-NEXT: vldr s6, [sp] +; ARMV8-NEXT: vmov s14, r0 +; ARMV8-NEXT: vmov r1, s0 +; ARMV8-NEXT: vminnm.f32 s0, s10, s2 +; ARMV8-NEXT: vmov s8, r3 +; ARMV8-NEXT: vminnm.f32 s6, s14, s6 +; ARMV8-NEXT: vmov r2, s0 +; ARMV8-NEXT: vminnm.f32 s0, s8, s4 +; ARMV8-NEXT: vmov r0, s6 +; ARMV8-NEXT: vmov r3, s0 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fminnumv432_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov d0, r0, r1 +; ARMV8M-NEXT: mov r0, sp +; ARMV8M-NEXT: vldrw.u32 q1, [r0] +; ARMV8M-NEXT: vmov d1, r2, r3 +; ARMV8M-NEXT: vminnm.f32 q0, q0, q1 +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: vmov r2, r3, d1 +; ARMV8M-NEXT: bx lr + %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) + ret <4 x float> %a +} + +define <4 x float> @fminnumv432_nsz_intrinsic(<4 x float> %x, <4 x float> %y) { +; ARMV7-LABEL: fminnumv432_nsz_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov d17, r2, r3 +; ARMV7-NEXT: vmov d16, r0, r1 +; ARMV7-NEXT: mov r0, sp +; ARMV7-NEXT: vld1.64 {d18, d19}, [r0] +; ARMV7-NEXT: vmin.f32 q8, q8, q9 +; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: vmov r2, r3, d17 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fminnumv432_nsz_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vldr s0, [sp, #4] +; ARMV8-NEXT: vmov s12, r1 +; ARMV8-NEXT: vldr s2, [sp, #8] +; ARMV8-NEXT: vmov s10, r2 +; ARMV8-NEXT: vminnm.f32 s0, s12, s0 +; ARMV8-NEXT: vldr s4, [sp, #12] +; ARMV8-NEXT: vldr s6, [sp] +; ARMV8-NEXT: vmov s14, r0 +; ARMV8-NEXT: vmov r1, s0 +; ARMV8-NEXT: vminnm.f32 s0, s10, s2 +; ARMV8-NEXT: vmov s8, r3 +; ARMV8-NEXT: vminnm.f32 s6, s14, s6 +; ARMV8-NEXT: vmov r2, s0 +; ARMV8-NEXT: vminnm.f32 s0, s8, s4 +; ARMV8-NEXT: vmov r0, s6 +; ARMV8-NEXT: vmov r3, s0 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fminnumv432_nsz_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov d0, r0, r1 +; ARMV8M-NEXT: mov r0, sp +; ARMV8M-NEXT: vldrw.u32 q1, [r0] +; ARMV8M-NEXT: vmov d1, r2, r3 +; ARMV8M-NEXT: vminnm.f32 q0, q0, q1 +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: vmov r2, r3, d1 +; ARMV8M-NEXT: bx lr + %a = call nnan nsz <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) + ret <4 x float> %a +} + +define <4 x float> @fminnumv432_non_zero_intrinsic(<4 x float> %x) { +; ARMV7-LABEL: fminnumv432_non_zero_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov d19, r2, r3 +; ARMV7-NEXT: vmov.f32 q8, #-1.000000e+00 +; ARMV7-NEXT: vmov d18, r0, r1 +; ARMV7-NEXT: vmin.f32 q8, q9, q8 +; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: vmov r2, r3, d17 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fminnumv432_non_zero_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vmov.f32 s0, #-1.000000e+00 +; ARMV8-NEXT: vmov s4, r2 +; ARMV8-NEXT: vmov s6, r1 +; ARMV8-NEXT: vminnm.f32 s4, s4, s0 +; ARMV8-NEXT: vmov s8, r0 +; ARMV8-NEXT: vminnm.f32 s6, s6, s0 +; ARMV8-NEXT: vmov s2, r3 +; ARMV8-NEXT: vminnm.f32 s8, s8, s0 +; ARMV8-NEXT: vminnm.f32 s0, s2, s0 +; ARMV8-NEXT: vmov r0, s8 +; ARMV8-NEXT: vmov r1, s6 +; ARMV8-NEXT: vmov r2, s4 +; ARMV8-NEXT: vmov r3, s0 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fminnumv432_non_zero_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov d1, r2, r3 +; ARMV8M-NEXT: vmov.f32 q1, #-1.000000e+00 +; ARMV8M-NEXT: vmov d0, r0, r1 +; ARMV8M-NEXT: vminnm.f32 q0, q0, q1 +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: vmov r2, r3, d1 +; ARMV8M-NEXT: bx lr + %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float>) + ret <4 x float> %a +} + +define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) { +; ARMV7-LABEL: fminnumv432_one_zero_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov d3, r2, r3 +; ARMV7-NEXT: vmov d2, r0, r1 +; ARMV7-NEXT: vmov.f32 s0, #-1.000000e+00 +; ARMV7-NEXT: vcmp.f32 s5, #0 +; ARMV7-NEXT: vldr s1, .LCPI18_0 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vcmp.f32 s7, s0 +; ARMV7-NEXT: vmovlt.f32 s1, s5 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov.f32 s3, s0 +; ARMV7-NEXT: vcmp.f32 s6, s0 +; ARMV7-NEXT: vmovlt.f32 s3, s7 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov.f32 s2, s0 +; ARMV7-NEXT: vcmp.f32 s4, s0 +; ARMV7-NEXT: vmovlt.f32 s2, s6 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmovlt.f32 s0, s4 +; ARMV7-NEXT: vmov r2, r3, d1 +; ARMV7-NEXT: vmov r0, r1, d0 +; ARMV7-NEXT: bx lr +; ARMV7-NEXT: .p2align 2 +; ARMV7-NEXT: @ %bb.1: +; ARMV7-NEXT: .LCPI18_0: +; ARMV7-NEXT: .long 0x00000000 @ float 0 +; +; ARMV8-LABEL: fminnumv432_one_zero_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vldr s0, .LCPI18_0 +; ARMV8-NEXT: vmov s8, r1 +; ARMV8-NEXT: vmov.f32 s2, #-1.000000e+00 +; ARMV8-NEXT: vminnm.f32 s0, s8, s0 +; ARMV8-NEXT: vmov s6, r2 +; ARMV8-NEXT: vmov s10, r0 +; ARMV8-NEXT: vmov r1, s0 +; ARMV8-NEXT: vminnm.f32 s0, s6, s2 +; ARMV8-NEXT: vmov s4, r3 +; ARMV8-NEXT: vminnm.f32 s10, s10, s2 +; ARMV8-NEXT: vmov r2, s0 +; ARMV8-NEXT: vminnm.f32 s0, s4, s2 +; ARMV8-NEXT: vmov r0, s10 +; ARMV8-NEXT: vmov r3, s0 +; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: .p2align 2 +; ARMV8-NEXT: @ %bb.1: +; ARMV8-NEXT: .LCPI18_0: +; ARMV8-NEXT: .long 0x00000000 @ float 0 +; +; ARMV8M-LABEL: fminnumv432_one_zero_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov d0, r0, r1 +; ARMV8M-NEXT: adr r0, .LCPI18_0 +; ARMV8M-NEXT: vldrw.u32 q1, [r0] +; ARMV8M-NEXT: vmov d1, r2, r3 +; ARMV8M-NEXT: vminnm.f32 q0, q0, q1 +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: vmov r2, r3, d1 +; ARMV8M-NEXT: bx lr +; ARMV8M-NEXT: .p2align 4 +; ARMV8M-NEXT: @ %bb.1: +; ARMV8M-NEXT: .LCPI18_0: +; ARMV8M-NEXT: .long 0xbf800000 @ float -1 +; ARMV8M-NEXT: .long 0x00000000 @ float 0 +; ARMV8M-NEXT: .long 0xbf800000 @ float -1 +; ARMV8M-NEXT: .long 0xbf800000 @ float -1 + %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float>) + ret <4 x float> %a +} + +define <4 x float> @fmaxnumv432_intrinsic(<4 x float> %x, <4 x float> %y) { +; ARMV7-LABEL: fmaxnumv432_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: mov r12, sp +; ARMV7-NEXT: vld1.64 {d0, d1}, [r12] +; ARMV7-NEXT: vmov d3, r2, r3 +; ARMV7-NEXT: vmov d2, r0, r1 +; ARMV7-NEXT: vcmp.f32 s7, s3 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vcmp.f32 s6, s2 +; ARMV7-NEXT: vmovgt.f32 s3, s7 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vcmp.f32 s5, s1 +; ARMV7-NEXT: vmovgt.f32 s2, s6 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vcmp.f32 s4, s0 +; ARMV7-NEXT: vmovgt.f32 s1, s5 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmovgt.f32 s0, s4 +; ARMV7-NEXT: vmov r2, r3, d1 +; ARMV7-NEXT: vmov r0, r1, d0 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fmaxnumv432_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vldr s0, [sp, #4] +; ARMV8-NEXT: vmov s12, r1 +; ARMV8-NEXT: vldr s2, [sp, #8] +; ARMV8-NEXT: vmov s10, r2 +; ARMV8-NEXT: vmaxnm.f32 s0, s12, s0 +; ARMV8-NEXT: vldr s4, [sp, #12] +; ARMV8-NEXT: vldr s6, [sp] +; ARMV8-NEXT: vmov s14, r0 +; ARMV8-NEXT: vmov r1, s0 +; ARMV8-NEXT: vmaxnm.f32 s0, s10, s2 +; ARMV8-NEXT: vmov s8, r3 +; ARMV8-NEXT: vmaxnm.f32 s6, s14, s6 +; ARMV8-NEXT: vmov r2, s0 +; ARMV8-NEXT: vmaxnm.f32 s0, s8, s4 +; ARMV8-NEXT: vmov r0, s6 +; ARMV8-NEXT: vmov r3, s0 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fmaxnumv432_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov d0, r0, r1 +; ARMV8M-NEXT: mov r0, sp +; ARMV8M-NEXT: vldrw.u32 q1, [r0] +; ARMV8M-NEXT: vmov d1, r2, r3 +; ARMV8M-NEXT: vmaxnm.f32 q0, q0, q1 +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: vmov r2, r3, d1 +; ARMV8M-NEXT: bx lr + %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) + ret <4 x float> %a +} + +define <4 x float> @fmaxnumv432_nsz_intrinsic(<4 x float> %x, <4 x float> %y) { +; ARMV7-LABEL: fmaxnumv432_nsz_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov d17, r2, r3 +; ARMV7-NEXT: vmov d16, r0, r1 +; ARMV7-NEXT: mov r0, sp +; ARMV7-NEXT: vld1.64 {d18, d19}, [r0] +; ARMV7-NEXT: vmax.f32 q8, q8, q9 +; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: vmov r2, r3, d17 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fmaxnumv432_nsz_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vldr s0, [sp, #4] +; ARMV8-NEXT: vmov s12, r1 +; ARMV8-NEXT: vldr s2, [sp, #8] +; ARMV8-NEXT: vmov s10, r2 +; ARMV8-NEXT: vmaxnm.f32 s0, s12, s0 +; ARMV8-NEXT: vldr s4, [sp, #12] +; ARMV8-NEXT: vldr s6, [sp] +; ARMV8-NEXT: vmov s14, r0 +; ARMV8-NEXT: vmov r1, s0 +; ARMV8-NEXT: vmaxnm.f32 s0, s10, s2 +; ARMV8-NEXT: vmov s8, r3 +; ARMV8-NEXT: vmaxnm.f32 s6, s14, s6 +; ARMV8-NEXT: vmov r2, s0 +; ARMV8-NEXT: vmaxnm.f32 s0, s8, s4 +; ARMV8-NEXT: vmov r0, s6 +; ARMV8-NEXT: vmov r3, s0 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fmaxnumv432_nsz_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov d0, r0, r1 +; ARMV8M-NEXT: mov r0, sp +; ARMV8M-NEXT: vldrw.u32 q1, [r0] +; ARMV8M-NEXT: vmov d1, r2, r3 +; ARMV8M-NEXT: vmaxnm.f32 q0, q0, q1 +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: vmov r2, r3, d1 +; ARMV8M-NEXT: bx lr + %a = call nnan nsz <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) + ret <4 x float> %a +} + +define <4 x float> @fmaxnumv432_zero_intrinsic(<4 x float> %x) { +; ARMV7-LABEL: fmaxnumv432_zero_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov d3, r2, r3 +; ARMV7-NEXT: vldr s0, .LCPI21_0 +; ARMV7-NEXT: vmov d2, r0, r1 +; ARMV7-NEXT: vcmp.f32 s7, #0 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov.f32 s3, s0 +; ARMV7-NEXT: vcmp.f32 s6, #0 +; ARMV7-NEXT: vmovgt.f32 s3, s7 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov.f32 s2, s0 +; ARMV7-NEXT: vcmp.f32 s5, #0 +; ARMV7-NEXT: vmovgt.f32 s2, s6 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov.f32 s1, s0 +; ARMV7-NEXT: vcmp.f32 s4, #0 +; ARMV7-NEXT: vmovgt.f32 s1, s5 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmovgt.f32 s0, s4 +; ARMV7-NEXT: vmov r2, r3, d1 +; ARMV7-NEXT: vmov r0, r1, d0 +; ARMV7-NEXT: bx lr +; ARMV7-NEXT: .p2align 2 +; ARMV7-NEXT: @ %bb.1: +; ARMV7-NEXT: .LCPI21_0: +; ARMV7-NEXT: .long 0x00000000 @ float 0 +; +; ARMV8-LABEL: fmaxnumv432_zero_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vldr s0, .LCPI21_0 +; ARMV8-NEXT: vmov s4, r2 +; ARMV8-NEXT: vmov s6, r1 +; ARMV8-NEXT: vmov s8, r0 +; ARMV8-NEXT: vmaxnm.f32 s6, s6, s0 +; ARMV8-NEXT: vmov s2, r3 +; ARMV8-NEXT: vmaxnm.f32 s8, s8, s0 +; ARMV8-NEXT: vmaxnm.f32 s4, s4, s0 +; ARMV8-NEXT: vmaxnm.f32 s0, s2, s0 +; ARMV8-NEXT: vmov r0, s8 +; ARMV8-NEXT: vmov r1, s6 +; ARMV8-NEXT: vmov r2, s4 +; ARMV8-NEXT: vmov r3, s0 +; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: .p2align 2 +; ARMV8-NEXT: @ %bb.1: +; ARMV8-NEXT: .LCPI21_0: +; ARMV8-NEXT: .long 0x00000000 @ float 0 +; +; ARMV8M-LABEL: fmaxnumv432_zero_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov d1, r2, r3 +; ARMV8M-NEXT: vmov.i32 q1, #0x0 +; ARMV8M-NEXT: vmov d0, r0, r1 +; ARMV8M-NEXT: vmaxnm.f32 q0, q0, q1 +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: vmov r2, r3, d1 +; ARMV8M-NEXT: bx lr + %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float>) + ret <4 x float> %a +} + +define <4 x float> @fmaxnumv432_minus_zero_intrinsic(<4 x float> %x) { +; ARMV7-LABEL: fmaxnumv432_minus_zero_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vldr s0, .LCPI22_0 +; ARMV7-NEXT: vmov d3, r2, r3 +; ARMV7-NEXT: vmov d2, r0, r1 +; ARMV7-NEXT: vcmp.f32 s7, s0 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov.f32 s3, s0 +; ARMV7-NEXT: vcmp.f32 s6, s0 +; ARMV7-NEXT: vmovgt.f32 s3, s7 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov.f32 s2, s0 +; ARMV7-NEXT: vcmp.f32 s5, s0 +; ARMV7-NEXT: vmovgt.f32 s2, s6 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov.f32 s1, s0 +; ARMV7-NEXT: vcmp.f32 s4, s0 +; ARMV7-NEXT: vmovgt.f32 s1, s5 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmovgt.f32 s0, s4 +; ARMV7-NEXT: vmov r2, r3, d1 +; ARMV7-NEXT: vmov r0, r1, d0 +; ARMV7-NEXT: bx lr +; ARMV7-NEXT: .p2align 2 +; ARMV7-NEXT: @ %bb.1: +; ARMV7-NEXT: .LCPI22_0: +; ARMV7-NEXT: .long 0x80000000 @ float -0 +; +; ARMV8-LABEL: fmaxnumv432_minus_zero_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vldr s0, .LCPI22_0 +; ARMV8-NEXT: vmov s4, r2 +; ARMV8-NEXT: vmov s6, r1 +; ARMV8-NEXT: vmov s8, r0 +; ARMV8-NEXT: vmaxnm.f32 s6, s6, s0 +; ARMV8-NEXT: vmov s2, r3 +; ARMV8-NEXT: vmaxnm.f32 s8, s8, s0 +; ARMV8-NEXT: vmaxnm.f32 s4, s4, s0 +; ARMV8-NEXT: vmaxnm.f32 s0, s2, s0 +; ARMV8-NEXT: vmov r0, s8 +; ARMV8-NEXT: vmov r1, s6 +; ARMV8-NEXT: vmov r2, s4 +; ARMV8-NEXT: vmov r3, s0 +; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: .p2align 2 +; ARMV8-NEXT: @ %bb.1: +; ARMV8-NEXT: .LCPI22_0: +; ARMV8-NEXT: .long 0x80000000 @ float -0 +; +; ARMV8M-LABEL: fmaxnumv432_minus_zero_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov d1, r2, r3 +; ARMV8M-NEXT: vmov.i32 q1, #0x80000000 +; ARMV8M-NEXT: vmov d0, r0, r1 +; ARMV8M-NEXT: vmaxnm.f32 q0, q0, q1 +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: vmov r2, r3, d1 +; ARMV8M-NEXT: bx lr + %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float>) + ret <4 x float> %a +} + +define <4 x float> @fmaxnumv432_non_zero_intrinsic(<4 x float> %x) { +; ARMV7-LABEL: fmaxnumv432_non_zero_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov d19, r2, r3 +; ARMV7-NEXT: vmov.f32 q8, #1.000000e+00 +; ARMV7-NEXT: vmov d18, r0, r1 +; ARMV7-NEXT: vmax.f32 q8, q9, q8 +; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: vmov r2, r3, d17 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fmaxnumv432_non_zero_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vmov.f32 s0, #1.000000e+00 +; ARMV8-NEXT: vmov s4, r2 +; ARMV8-NEXT: vmov s6, r1 +; ARMV8-NEXT: vmaxnm.f32 s4, s4, s0 +; ARMV8-NEXT: vmov s8, r0 +; ARMV8-NEXT: vmaxnm.f32 s6, s6, s0 +; ARMV8-NEXT: vmov s2, r3 +; ARMV8-NEXT: vmaxnm.f32 s8, s8, s0 +; ARMV8-NEXT: vmaxnm.f32 s0, s2, s0 +; ARMV8-NEXT: vmov r0, s8 +; ARMV8-NEXT: vmov r1, s6 +; ARMV8-NEXT: vmov r2, s4 +; ARMV8-NEXT: vmov r3, s0 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fmaxnumv432_non_zero_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov d1, r2, r3 +; ARMV8M-NEXT: vmov.f32 q1, #1.000000e+00 +; ARMV8M-NEXT: vmov d0, r0, r1 +; ARMV8M-NEXT: vmaxnm.f32 q0, q0, q1 +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: vmov r2, r3, d1 +; ARMV8M-NEXT: bx lr + %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float>) + ret <4 x float> %a +} + +define <2 x double> @fminnumv264_intrinsic(<2 x double> %x, <2 x double> %y) { +; ARMV7-LABEL: fminnumv264_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: mov r12, sp +; ARMV7-NEXT: vld1.64 {d16, d17}, [r12] +; ARMV7-NEXT: vmov d18, r0, r1 +; ARMV7-NEXT: vcmp.f64 d18, d16 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov d19, r2, r3 +; ARMV7-NEXT: vcmp.f64 d19, d17 +; ARMV7-NEXT: vmovlt.f64 d16, d18 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: vmovlt.f64 d17, d19 +; ARMV7-NEXT: vmov r2, r3, d17 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fminnumv264_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vldr d16, [sp, #8] +; ARMV8-NEXT: vmov d18, r2, r3 +; ARMV8-NEXT: vldr d17, [sp] +; ARMV8-NEXT: vmov d19, r0, r1 +; ARMV8-NEXT: vminnm.f64 d16, d18, d16 +; ARMV8-NEXT: vminnm.f64 d17, d19, d17 +; ARMV8-NEXT: vmov r2, r3, d16 +; ARMV8-NEXT: vmov r0, r1, d17 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fminnumv264_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: mov r12, sp +; ARMV8M-NEXT: vmov d0, r0, r1 +; ARMV8M-NEXT: vldrw.u32 q1, [r12] +; ARMV8M-NEXT: vmov d1, r2, r3 +; ARMV8M-NEXT: vcmp.f64 d2, d0 +; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8M-NEXT: vcmp.f64 d3, d1 +; ARMV8M-NEXT: vselgt.f64 d0, d0, d2 +; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: vselgt.f64 d1, d1, d3 +; ARMV8M-NEXT: vmov r2, r3, d1 +; ARMV8M-NEXT: bx lr + %a = call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> %y) + ret <2 x double> %a +} + +define <2 x double> @fminnumv264_nsz_intrinsic(<2 x double> %x, <2 x double> %y) { +; ARMV7-LABEL: fminnumv264_nsz_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: mov r12, sp +; ARMV7-NEXT: vld1.64 {d16, d17}, [r12] +; ARMV7-NEXT: vmov d18, r0, r1 +; ARMV7-NEXT: vcmp.f64 d18, d16 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov d19, r2, r3 +; ARMV7-NEXT: vcmp.f64 d19, d17 +; ARMV7-NEXT: vmovlt.f64 d16, d18 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: vmovlt.f64 d17, d19 +; ARMV7-NEXT: vmov r2, r3, d17 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fminnumv264_nsz_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vldr d16, [sp, #8] +; ARMV8-NEXT: vmov d18, r2, r3 +; ARMV8-NEXT: vldr d17, [sp] +; ARMV8-NEXT: vmov d19, r0, r1 +; ARMV8-NEXT: vminnm.f64 d16, d18, d16 +; ARMV8-NEXT: vminnm.f64 d17, d19, d17 +; ARMV8-NEXT: vmov r2, r3, d16 +; ARMV8-NEXT: vmov r0, r1, d17 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fminnumv264_nsz_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: mov r12, sp +; ARMV8M-NEXT: vmov d0, r0, r1 +; ARMV8M-NEXT: vldrw.u32 q1, [r12] +; ARMV8M-NEXT: vmov d1, r2, r3 +; ARMV8M-NEXT: vcmp.f64 d2, d0 +; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8M-NEXT: vcmp.f64 d3, d1 +; ARMV8M-NEXT: vselgt.f64 d0, d0, d2 +; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: vselgt.f64 d1, d1, d3 +; ARMV8M-NEXT: vmov r2, r3, d1 +; ARMV8M-NEXT: bx lr + %a = call nnan nsz <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> %y) + ret <2 x double> %a +} + +define <2 x double> @fminnumv264_non_zero_intrinsic(<2 x double> %x) { +; ARMV7-LABEL: fminnumv264_non_zero_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov.f64 d16, #1.000000e+00 +; ARMV7-NEXT: vmov d17, r0, r1 +; ARMV7-NEXT: vcmp.f64 d17, d16 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov d18, r2, r3 +; ARMV7-NEXT: vcmp.f64 d18, d16 +; ARMV7-NEXT: vmov.f64 d19, d16 +; ARMV7-NEXT: vmovlt.f64 d19, d17 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov r0, r1, d19 +; ARMV7-NEXT: vmovlt.f64 d16, d18 +; ARMV7-NEXT: vmov r2, r3, d16 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fminnumv264_non_zero_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vmov.f64 d16, #1.000000e+00 +; ARMV8-NEXT: vmov d18, r0, r1 +; ARMV8-NEXT: vmov d17, r2, r3 +; ARMV8-NEXT: vminnm.f64 d18, d18, d16 +; ARMV8-NEXT: vminnm.f64 d16, d17, d16 +; ARMV8-NEXT: vmov r0, r1, d18 +; ARMV8-NEXT: vmov r2, r3, d16 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fminnumv264_non_zero_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov d1, r0, r1 +; ARMV8M-NEXT: vmov.f64 d0, #1.000000e+00 +; ARMV8M-NEXT: vcmp.f64 d0, d1 +; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8M-NEXT: vmov d2, r2, r3 +; ARMV8M-NEXT: vcmp.f64 d0, d2 +; ARMV8M-NEXT: vselgt.f64 d1, d1, d0 +; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8M-NEXT: vmov r0, r1, d1 +; ARMV8M-NEXT: vselgt.f64 d0, d2, d0 +; ARMV8M-NEXT: vmov r2, r3, d0 +; ARMV8M-NEXT: bx lr + %a = call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double>) + ret <2 x double> %a +} + +define <2 x double> @fminnumv264_one_zero_intrinsic(<2 x double> %x) { +; ARMV7-LABEL: fminnumv264_one_zero_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov d18, r2, r3 +; ARMV7-NEXT: vcmp.f64 d18, #0 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov d19, r0, r1 +; ARMV7-NEXT: vmov.f64 d16, #-1.000000e+00 +; ARMV7-NEXT: vcmp.f64 d19, d16 +; ARMV7-NEXT: vmov.i32 d17, #0x0 +; ARMV7-NEXT: vmovlt.f64 d17, d18 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov r2, r3, d17 +; ARMV7-NEXT: vmovlt.f64 d16, d19 +; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fminnumv264_one_zero_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vmov.f64 d16, #-1.000000e+00 +; ARMV8-NEXT: vldr d17, .LCPI27_0 +; ARMV8-NEXT: vmov d18, r0, r1 +; ARMV8-NEXT: vmov d19, r2, r3 +; ARMV8-NEXT: vminnm.f64 d16, d18, d16 +; ARMV8-NEXT: vminnm.f64 d17, d19, d17 +; ARMV8-NEXT: vmov r0, r1, d16 +; ARMV8-NEXT: vmov r2, r3, d17 +; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: .p2align 3 +; ARMV8-NEXT: @ %bb.1: +; ARMV8-NEXT: .LCPI27_0: +; ARMV8-NEXT: .long 0 @ double 0 +; ARMV8-NEXT: .long 0 +; +; ARMV8M-LABEL: fminnumv264_one_zero_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov d3, r2, r3 +; ARMV8M-NEXT: vldr d1, .LCPI27_0 +; ARMV8M-NEXT: vcmp.f64 d3, #0 +; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8M-NEXT: vmov d2, r0, r1 +; ARMV8M-NEXT: vmov.f64 d0, #-1.000000e+00 +; ARMV8M-NEXT: vcmp.f64 d0, d2 +; ARMV8M-NEXT: vmovlt.f64 d1, d3 +; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8M-NEXT: vmov r2, r3, d1 +; ARMV8M-NEXT: vselgt.f64 d0, d2, d0 +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: bx lr +; ARMV8M-NEXT: .p2align 3 +; ARMV8M-NEXT: @ %bb.1: +; ARMV8M-NEXT: .LCPI27_0: +; ARMV8M-NEXT: .long 0 @ double 0 +; ARMV8M-NEXT: .long 0 + %a = call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double>) + ret <2 x double> %a +} + +define <2 x double> @fmaxnumv264_intrinsic(<2 x double> %x, <2 x double> %y) { +; ARMV7-LABEL: fmaxnumv264_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: mov r12, sp +; ARMV7-NEXT: vld1.64 {d16, d17}, [r12] +; ARMV7-NEXT: vmov d18, r0, r1 +; ARMV7-NEXT: vcmp.f64 d18, d16 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov d19, r2, r3 +; ARMV7-NEXT: vcmp.f64 d19, d17 +; ARMV7-NEXT: vmovgt.f64 d16, d18 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: vmovgt.f64 d17, d19 +; ARMV7-NEXT: vmov r2, r3, d17 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fmaxnumv264_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vldr d16, [sp, #8] +; ARMV8-NEXT: vmov d18, r2, r3 +; ARMV8-NEXT: vldr d17, [sp] +; ARMV8-NEXT: vmov d19, r0, r1 +; ARMV8-NEXT: vmaxnm.f64 d16, d18, d16 +; ARMV8-NEXT: vmaxnm.f64 d17, d19, d17 +; ARMV8-NEXT: vmov r2, r3, d16 +; ARMV8-NEXT: vmov r0, r1, d17 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fmaxnumv264_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: mov r12, sp +; ARMV8M-NEXT: vmov d1, r0, r1 +; ARMV8M-NEXT: vldrw.u32 q1, [r12] +; ARMV8M-NEXT: vmov d0, r2, r3 +; ARMV8M-NEXT: vcmp.f64 d1, d2 +; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8M-NEXT: vcmp.f64 d0, d3 +; ARMV8M-NEXT: vselgt.f64 d1, d1, d2 +; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8M-NEXT: vmov r0, r1, d1 +; ARMV8M-NEXT: vselgt.f64 d0, d0, d3 +; ARMV8M-NEXT: vmov r2, r3, d0 +; ARMV8M-NEXT: bx lr + %a = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double> %y) + ret <2 x double> %a +} + +define <2 x double> @fmaxnumv264_nsz_intrinsic(<2 x double> %x, <2 x double> %y) { +; ARMV7-LABEL: fmaxnumv264_nsz_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: mov r12, sp +; ARMV7-NEXT: vld1.64 {d16, d17}, [r12] +; ARMV7-NEXT: vmov d18, r0, r1 +; ARMV7-NEXT: vcmp.f64 d18, d16 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov d19, r2, r3 +; ARMV7-NEXT: vcmp.f64 d19, d17 +; ARMV7-NEXT: vmovgt.f64 d16, d18 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: vmovgt.f64 d17, d19 +; ARMV7-NEXT: vmov r2, r3, d17 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fmaxnumv264_nsz_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vldr d16, [sp, #8] +; ARMV8-NEXT: vmov d18, r2, r3 +; ARMV8-NEXT: vldr d17, [sp] +; ARMV8-NEXT: vmov d19, r0, r1 +; ARMV8-NEXT: vmaxnm.f64 d16, d18, d16 +; ARMV8-NEXT: vmaxnm.f64 d17, d19, d17 +; ARMV8-NEXT: vmov r2, r3, d16 +; ARMV8-NEXT: vmov r0, r1, d17 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fmaxnumv264_nsz_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: mov r12, sp +; ARMV8M-NEXT: vmov d1, r0, r1 +; ARMV8M-NEXT: vldrw.u32 q1, [r12] +; ARMV8M-NEXT: vmov d0, r2, r3 +; ARMV8M-NEXT: vcmp.f64 d1, d2 +; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8M-NEXT: vcmp.f64 d0, d3 +; ARMV8M-NEXT: vselgt.f64 d1, d1, d2 +; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8M-NEXT: vmov r0, r1, d1 +; ARMV8M-NEXT: vselgt.f64 d0, d0, d3 +; ARMV8M-NEXT: vmov r2, r3, d0 +; ARMV8M-NEXT: bx lr + %a = call nnan nsz <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double> %y) + ret <2 x double> %a +} + +define <2 x double> @fmaxnumv264_zero_intrinsic(<2 x double> %x) { +; ARMV7-LABEL: fmaxnumv264_zero_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vldr d17, .LCPI30_0 +; ARMV7-NEXT: vmov d18, r2, r3 +; ARMV7-NEXT: vmov d19, r0, r1 +; ARMV7-NEXT: vcmp.f64 d18, d17 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov.i32 d16, #0x0 +; ARMV7-NEXT: vcmp.f64 d19, #0 +; ARMV7-NEXT: vmovgt.f64 d17, d18 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov r2, r3, d17 +; ARMV7-NEXT: vmovgt.f64 d16, d19 +; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: bx lr +; ARMV7-NEXT: .p2align 3 +; ARMV7-NEXT: @ %bb.1: +; ARMV7-NEXT: .LCPI30_0: +; ARMV7-NEXT: .long 0 @ double -0 +; ARMV7-NEXT: .long 2147483648 +; +; ARMV8-LABEL: fmaxnumv264_zero_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vldr d16, .LCPI30_0 +; ARMV8-NEXT: vmov d18, r2, r3 +; ARMV8-NEXT: vldr d17, .LCPI30_1 +; ARMV8-NEXT: vmov d19, r0, r1 +; ARMV8-NEXT: vmaxnm.f64 d16, d18, d16 +; ARMV8-NEXT: vmaxnm.f64 d17, d19, d17 +; ARMV8-NEXT: vmov r2, r3, d16 +; ARMV8-NEXT: vmov r0, r1, d17 +; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: .p2align 3 +; ARMV8-NEXT: @ %bb.1: +; ARMV8-NEXT: .LCPI30_0: +; ARMV8-NEXT: .long 0 @ double -0 +; ARMV8-NEXT: .long 2147483648 +; ARMV8-NEXT: .LCPI30_1: +; ARMV8-NEXT: .long 0 @ double 0 +; ARMV8-NEXT: .long 0 +; +; ARMV8M-LABEL: fmaxnumv264_zero_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov d2, r0, r1 +; ARMV8M-NEXT: vldr d0, .LCPI30_0 +; ARMV8M-NEXT: vcmp.f64 d2, #0 +; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8M-NEXT: vmov d3, r2, r3 +; ARMV8M-NEXT: vcmp.f64 d3, d0 +; ARMV8M-NEXT: vldr d1, .LCPI30_1 +; ARMV8M-NEXT: vselgt.f64 d1, d2, d1 +; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8M-NEXT: vmov r0, r1, d1 +; ARMV8M-NEXT: vselgt.f64 d0, d3, d0 +; ARMV8M-NEXT: vmov r2, r3, d0 +; ARMV8M-NEXT: bx lr +; ARMV8M-NEXT: .p2align 3 +; ARMV8M-NEXT: @ %bb.1: +; ARMV8M-NEXT: .LCPI30_0: +; ARMV8M-NEXT: .long 0 @ double -0 +; ARMV8M-NEXT: .long 2147483648 +; ARMV8M-NEXT: .LCPI30_1: +; ARMV8M-NEXT: .long 0 @ double 0 +; ARMV8M-NEXT: .long 0 + %a = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double>) + ret <2 x double> %a +} + +define <2 x double> @fmaxnumv264_minus_zero_intrinsic(<2 x double> %x) { +; ARMV7-LABEL: fmaxnumv264_minus_zero_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vldr d16, .LCPI31_0 +; ARMV7-NEXT: vmov d17, r0, r1 +; ARMV7-NEXT: vmov d18, r2, r3 +; ARMV7-NEXT: vcmp.f64 d17, d16 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vcmp.f64 d18, d16 +; ARMV7-NEXT: vmov.f64 d19, d16 +; ARMV7-NEXT: vmovgt.f64 d19, d17 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov r0, r1, d19 +; ARMV7-NEXT: vmovgt.f64 d16, d18 +; ARMV7-NEXT: vmov r2, r3, d16 +; ARMV7-NEXT: bx lr +; ARMV7-NEXT: .p2align 3 +; ARMV7-NEXT: @ %bb.1: +; ARMV7-NEXT: .LCPI31_0: +; ARMV7-NEXT: .long 0 @ double -0 +; ARMV7-NEXT: .long 2147483648 +; +; ARMV8-LABEL: fmaxnumv264_minus_zero_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vldr d16, .LCPI31_0 +; ARMV8-NEXT: vmov d18, r0, r1 +; ARMV8-NEXT: vmov d17, r2, r3 +; ARMV8-NEXT: vmaxnm.f64 d18, d18, d16 +; ARMV8-NEXT: vmaxnm.f64 d16, d17, d16 +; ARMV8-NEXT: vmov r0, r1, d18 +; ARMV8-NEXT: vmov r2, r3, d16 +; ARMV8-NEXT: mov pc, lr +; ARMV8-NEXT: .p2align 3 +; ARMV8-NEXT: @ %bb.1: +; ARMV8-NEXT: .LCPI31_0: +; ARMV8-NEXT: .long 0 @ double -0 +; ARMV8-NEXT: .long 2147483648 +; +; ARMV8M-LABEL: fmaxnumv264_minus_zero_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vldr d0, .LCPI31_0 +; ARMV8M-NEXT: vmov d1, r0, r1 +; ARMV8M-NEXT: vmov d2, r2, r3 +; ARMV8M-NEXT: vcmp.f64 d1, d0 +; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8M-NEXT: vcmp.f64 d2, d0 +; ARMV8M-NEXT: vselgt.f64 d1, d1, d0 +; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8M-NEXT: vmov r0, r1, d1 +; ARMV8M-NEXT: vselgt.f64 d0, d2, d0 +; ARMV8M-NEXT: vmov r2, r3, d0 +; ARMV8M-NEXT: bx lr +; ARMV8M-NEXT: .p2align 3 +; ARMV8M-NEXT: @ %bb.1: +; ARMV8M-NEXT: .LCPI31_0: +; ARMV8M-NEXT: .long 0 @ double -0 +; ARMV8M-NEXT: .long 2147483648 + %a = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double>) + ret <2 x double> %a +} + +define <2 x double> @fmaxnumv264_non_zero_intrinsic(<2 x double> %x) { +; ARMV7-LABEL: fmaxnumv264_non_zero_intrinsic: +; ARMV7: @ %bb.0: +; ARMV7-NEXT: vmov.f64 d16, #1.000000e+00 +; ARMV7-NEXT: vmov d17, r0, r1 +; ARMV7-NEXT: vcmp.f64 d17, d16 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov d18, r2, r3 +; ARMV7-NEXT: vcmp.f64 d18, d16 +; ARMV7-NEXT: vmov.f64 d19, d16 +; ARMV7-NEXT: vmovgt.f64 d19, d17 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov r0, r1, d19 +; ARMV7-NEXT: vmovgt.f64 d16, d18 +; ARMV7-NEXT: vmov r2, r3, d16 +; ARMV7-NEXT: bx lr +; +; ARMV8-LABEL: fmaxnumv264_non_zero_intrinsic: +; ARMV8: @ %bb.0: +; ARMV8-NEXT: vmov.f64 d16, #1.000000e+00 +; ARMV8-NEXT: vmov d18, r0, r1 +; ARMV8-NEXT: vmov d17, r2, r3 +; ARMV8-NEXT: vmaxnm.f64 d18, d18, d16 +; ARMV8-NEXT: vmaxnm.f64 d16, d17, d16 +; ARMV8-NEXT: vmov r0, r1, d18 +; ARMV8-NEXT: vmov r2, r3, d16 +; ARMV8-NEXT: mov pc, lr +; +; ARMV8M-LABEL: fmaxnumv264_non_zero_intrinsic: +; ARMV8M: @ %bb.0: +; ARMV8M-NEXT: vmov.f64 d0, #1.000000e+00 +; ARMV8M-NEXT: vmov d1, r0, r1 +; ARMV8M-NEXT: vcmp.f64 d1, d0 +; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8M-NEXT: vmov d2, r2, r3 +; ARMV8M-NEXT: vcmp.f64 d2, d0 +; ARMV8M-NEXT: vselgt.f64 d1, d1, d0 +; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr +; ARMV8M-NEXT: vmov r0, r1, d1 +; ARMV8M-NEXT: vselgt.f64 d0, d2, d0 +; ARMV8M-NEXT: vmov r2, r3, d0 +; ARMV8M-NEXT: bx lr + %a = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double>) + ret <2 x double> %a +} Index: llvm/test/CodeGen/WebAssembly/f32.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/f32.ll +++ llvm/test/CodeGen/WebAssembly/f32.ll @@ -217,11 +217,38 @@ ; CHECK-LABEL: fminnum32_intrinsic: ; CHECK: .functype fminnum32_intrinsic (f32, f32) -> (f32) ; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push5=, 0 +; CHECK-NEXT: local.get $push4=, 1 +; CHECK-NEXT: local.get $push3=, 0 +; CHECK-NEXT: local.get $push2=, 1 +; CHECK-NEXT: f32.lt $push0=, $pop3, $pop2 +; CHECK-NEXT: f32.select $push1=, $pop5, $pop4, $pop0 +; CHECK-NEXT: return $pop1 + %a = call nnan float @llvm.minnum.f32(float %x, float %y) + ret float %a +} + +define float @fminnum32_non_zero_intrinsic(float %x) { +; CHECK-LABEL: fminnum32_non_zero_intrinsic: +; CHECK: .functype fminnum32_non_zero_intrinsic (f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: f32.const $push0=, -0x1p0 +; CHECK-NEXT: f32.min $push1=, $pop2, $pop0 +; CHECK-NEXT: return $pop1 + %a = call nnan float @llvm.minnum.f32(float %x, float -1.0) + ret float %a +} + +define float @fminnum32_nsz_intrinsic(float %x, float %y) { +; CHECK-LABEL: fminnum32_nsz_intrinsic: +; CHECK: .functype fminnum32_nsz_intrinsic (f32, f32) -> (f32) +; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get $push2=, 0 ; CHECK-NEXT: local.get $push1=, 1 ; CHECK-NEXT: f32.min $push0=, $pop2, $pop1 ; CHECK-NEXT: return $pop0 - %a = call nnan float @llvm.minnum.f32(float %x, float %y) + %a = call nnan nsz float @llvm.minnum.f32(float %x, float %y) ret float %a } @@ -243,11 +270,53 @@ ; CHECK-LABEL: fmaxnum32_intrinsic: ; CHECK: .functype fmaxnum32_intrinsic (f32, f32) -> (f32) ; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push5=, 0 +; CHECK-NEXT: local.get $push4=, 1 +; CHECK-NEXT: local.get $push3=, 0 +; CHECK-NEXT: local.get $push2=, 1 +; CHECK-NEXT: f32.gt $push0=, $pop3, $pop2 +; CHECK-NEXT: f32.select $push1=, $pop5, $pop4, $pop0 +; CHECK-NEXT: return $pop1 + %a = call nnan float @llvm.maxnum.f32(float %x, float %y) + ret float %a +} + +define float @fmaxnum32_nsz_intrinsic(float %x, float %y) { +; CHECK-LABEL: fmaxnum32_nsz_intrinsic: +; CHECK: .functype fmaxnum32_nsz_intrinsic (f32, f32) -> (f32) +; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get $push2=, 0 ; CHECK-NEXT: local.get $push1=, 1 ; CHECK-NEXT: f32.max $push0=, $pop2, $pop1 ; CHECK-NEXT: return $pop0 - %a = call nnan float @llvm.maxnum.f32(float %x, float %y) + %a = call nnan nsz float @llvm.maxnum.f32(float %x, float %y) + ret float %a +} + +define float @fmaxnum32_zero_intrinsic(float %x) { +; CHECK-LABEL: fmaxnum32_zero_intrinsic: +; CHECK: .functype fmaxnum32_zero_intrinsic (f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push5=, 0 +; CHECK-NEXT: f32.const $push0=, 0x0p0 +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: f32.const $push3=, 0x0p0 +; CHECK-NEXT: f32.gt $push1=, $pop4, $pop3 +; CHECK-NEXT: f32.select $push2=, $pop5, $pop0, $pop1 +; CHECK-NEXT: return $pop2 + %a = call nnan float @llvm.maxnum.f32(float %x, float 0.0) + ret float %a +} + +define float @fmaxnum32_non_zero_intrinsic(float %x) { +; CHECK-LABEL: fmaxnum32_non_zero_intrinsic: +; CHECK: .functype fmaxnum32_non_zero_intrinsic (f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: f32.const $push0=, 0x1p0 +; CHECK-NEXT: f32.max $push1=, $pop2, $pop0 +; CHECK-NEXT: return $pop1 + %a = call nnan float @llvm.maxnum.f32(float %x, float 1.0) ret float %a } Index: llvm/test/CodeGen/WebAssembly/f64.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/f64.ll +++ llvm/test/CodeGen/WebAssembly/f64.ll @@ -212,6 +212,61 @@ ret double %a } +declare double @llvm.minnum.f64(double, double) +define double @fminnum64_intrinsic(double %x, double %y) { +; CHECK-LABEL: fminnum64_intrinsic: +; CHECK: .functype fminnum64_intrinsic (f64, f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push5=, 0 +; CHECK-NEXT: local.get $push4=, 1 +; CHECK-NEXT: local.get $push3=, 0 +; CHECK-NEXT: local.get $push2=, 1 +; CHECK-NEXT: f64.lt $push0=, $pop3, $pop2 +; CHECK-NEXT: f64.select $push1=, $pop5, $pop4, $pop0 +; CHECK-NEXT: return $pop1 + %a = call nnan double @llvm.minnum.f64(double %x, double %y) + ret double %a +} + +define double @fminnum64_nsz_intrinsic(double %x, double %y) { +; CHECK-LABEL: fminnum64_nsz_intrinsic: +; CHECK: .functype fminnum64_nsz_intrinsic (f64, f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.min $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 + %a = call nnan nsz double @llvm.minnum.f64(double %x, double %y) + ret double %a +} + +define double @fminnum64_zero_intrinsic(double %x) { +; CHECK-LABEL: fminnum64_zero_intrinsic: +; CHECK: .functype fminnum64_zero_intrinsic (f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push5=, 0 +; CHECK-NEXT: f64.const $push0=, -0x0p0 +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: f64.const $push3=, -0x0p0 +; CHECK-NEXT: f64.lt $push1=, $pop4, $pop3 +; CHECK-NEXT: f64.select $push2=, $pop5, $pop0, $pop1 +; CHECK-NEXT: return $pop2 + %a = call nnan double @llvm.minnum.f64(double %x, double -0.0) + ret double %a +} + +define double @fminnum64_non_zero_intrinsic(double %x) { +; CHECK-LABEL: fminnum64_non_zero_intrinsic: +; CHECK: .functype fminnum64_non_zero_intrinsic (f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: f64.const $push0=, -0x1p0 +; CHECK-NEXT: f64.min $push1=, $pop2, $pop0 +; CHECK-NEXT: return $pop1 + %a = call nnan double @llvm.minnum.f64(double %x, double -1.0) + ret double %a +} + declare double @llvm.maximum.f64(double, double) define double @fmax64_intrinsic(double %x, double %y) { ; CHECK-LABEL: fmax64_intrinsic: @@ -225,6 +280,61 @@ ret double %a } +declare double @llvm.maxnum.f64(double, double) +define double@fmaxnum64_intrinsic(double %x, double %y) { +; CHECK-LABEL: fmaxnum64_intrinsic: +; CHECK: .functype fmaxnum64_intrinsic (f64, f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push5=, 0 +; CHECK-NEXT: local.get $push4=, 1 +; CHECK-NEXT: local.get $push3=, 0 +; CHECK-NEXT: local.get $push2=, 1 +; CHECK-NEXT: f64.gt $push0=, $pop3, $pop2 +; CHECK-NEXT: f64.select $push1=, $pop5, $pop4, $pop0 +; CHECK-NEXT: return $pop1 + %a = call nnan double @llvm.maxnum.f64(double %x, double %y) + ret double %a +} + +define double@fmaxnum64_nsz_intrinsic(double %x, double %y) { +; CHECK-LABEL: fmaxnum64_nsz_intrinsic: +; CHECK: .functype fmaxnum64_nsz_intrinsic (f64, f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.max $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 + %a = call nnan nsz double @llvm.maxnum.f64(double %x, double %y) + ret double %a +} + +define double @fmaxnum64_zero_intrinsic(double %x) { +; CHECK-LABEL: fmaxnum64_zero_intrinsic: +; CHECK: .functype fmaxnum64_zero_intrinsic (f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push5=, 0 +; CHECK-NEXT: f64.const $push0=, 0x0p0 +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: f64.const $push3=, 0x0p0 +; CHECK-NEXT: f64.gt $push1=, $pop4, $pop3 +; CHECK-NEXT: f64.select $push2=, $pop5, $pop0, $pop1 +; CHECK-NEXT: return $pop2 + %a = call nnan double @llvm.maxnum.f64(double %x, double 0.0) + ret double %a +} + +define double @fmaxnum64_non_zero_intrinsic(double %x) { +; CHECK-LABEL: fmaxnum64_non_zero_intrinsic: +; CHECK: .functype fmaxnum64_non_zero_intrinsic (f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: f64.const $push0=, 0x1p0 +; CHECK-NEXT: f64.max $push1=, $pop2, $pop0 +; CHECK-NEXT: return $pop1 + %a = call nnan double @llvm.maxnum.f64(double %x, double 1.0) + ret double %a +} + define double @fma64(double %a, double %b, double %c) { ; CHECK-LABEL: fma64: ; CHECK: .functype fma64 (f64, f64, f64) -> (f64) Index: llvm/test/CodeGen/WebAssembly/simd-arith.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/simd-arith.ll +++ llvm/test/CodeGen/WebAssembly/simd-arith.ll @@ -13498,13 +13498,67 @@ ; SIMD128-LABEL: minnum_intrinsic_v4f32: ; SIMD128: .functype minnum_intrinsic_v4f32 (v128, v128) -> (v128) ; SIMD128-NEXT: # %bb.0: -; SIMD128-NEXT: f32x4.min $push0=, $0, $1 -; SIMD128-NEXT: return $pop0 +; SIMD128-NEXT: f32x4.extract_lane $push27=, $0, 0 +; SIMD128-NEXT: local.tee $push26=, $3=, $pop27 +; SIMD128-NEXT: f32x4.extract_lane $push25=, $1, 0 +; SIMD128-NEXT: local.tee $push24=, $2=, $pop25 +; SIMD128-NEXT: f32.lt $push2=, $3, $2 +; SIMD128-NEXT: f32.select $push3=, $pop26, $pop24, $pop2 +; SIMD128-NEXT: f32x4.splat $push4=, $pop3 +; SIMD128-NEXT: f32x4.extract_lane $push23=, $0, 1 +; SIMD128-NEXT: local.tee $push22=, $3=, $pop23 +; SIMD128-NEXT: f32x4.extract_lane $push21=, $1, 1 +; SIMD128-NEXT: local.tee $push20=, $2=, $pop21 +; SIMD128-NEXT: f32.lt $push0=, $3, $2 +; SIMD128-NEXT: f32.select $push1=, $pop22, $pop20, $pop0 +; SIMD128-NEXT: f32x4.replace_lane $push5=, $pop4, 1, $pop1 +; SIMD128-NEXT: f32x4.extract_lane $push19=, $0, 2 +; SIMD128-NEXT: local.tee $push18=, $3=, $pop19 +; SIMD128-NEXT: f32x4.extract_lane $push17=, $1, 2 +; SIMD128-NEXT: local.tee $push16=, $2=, $pop17 +; SIMD128-NEXT: f32.lt $push6=, $3, $2 +; SIMD128-NEXT: f32.select $push7=, $pop18, $pop16, $pop6 +; SIMD128-NEXT: f32x4.replace_lane $push8=, $pop5, 2, $pop7 +; SIMD128-NEXT: f32x4.extract_lane $push15=, $0, 3 +; SIMD128-NEXT: local.tee $push14=, $3=, $pop15 +; SIMD128-NEXT: f32x4.extract_lane $push13=, $1, 3 +; SIMD128-NEXT: local.tee $push12=, $2=, $pop13 +; SIMD128-NEXT: f32.lt $push9=, $3, $2 +; SIMD128-NEXT: f32.select $push10=, $pop14, $pop12, $pop9 +; SIMD128-NEXT: f32x4.replace_lane $push11=, $pop8, 3, $pop10 +; SIMD128-NEXT: return $pop11 ; ; SIMD128-FAST-LABEL: minnum_intrinsic_v4f32: ; SIMD128-FAST: .functype minnum_intrinsic_v4f32 (v128, v128) -> (v128) ; SIMD128-FAST-NEXT: # %bb.0: -; SIMD128-FAST-NEXT: f32x4.min $push0=, $0, $1 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push27=, $0, 0 +; SIMD128-FAST-NEXT: local.tee $push26=, $3=, $pop27 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push25=, $1, 0 +; SIMD128-FAST-NEXT: local.tee $push24=, $2=, $pop25 +; SIMD128-FAST-NEXT: f32.lt $push3=, $3, $2 +; SIMD128-FAST-NEXT: f32.select $push4=, $pop26, $pop24, $pop3 +; SIMD128-FAST-NEXT: f32x4.splat $push5=, $pop4 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push23=, $0, 1 +; SIMD128-FAST-NEXT: local.tee $push22=, $3=, $pop23 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push21=, $1, 1 +; SIMD128-FAST-NEXT: local.tee $push20=, $2=, $pop21 +; SIMD128-FAST-NEXT: f32.lt $push1=, $3, $2 +; SIMD128-FAST-NEXT: f32.select $push2=, $pop22, $pop20, $pop1 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push6=, $pop5, 1, $pop2 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push19=, $0, 2 +; SIMD128-FAST-NEXT: local.tee $push18=, $3=, $pop19 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push17=, $1, 2 +; SIMD128-FAST-NEXT: local.tee $push16=, $2=, $pop17 +; SIMD128-FAST-NEXT: f32.lt $push7=, $3, $2 +; SIMD128-FAST-NEXT: f32.select $push8=, $pop18, $pop16, $pop7 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push9=, $pop6, 2, $pop8 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push15=, $0, 3 +; SIMD128-FAST-NEXT: local.tee $push14=, $3=, $pop15 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push13=, $1, 3 +; SIMD128-FAST-NEXT: local.tee $push12=, $2=, $pop13 +; SIMD128-FAST-NEXT: f32.lt $push10=, $3, $2 +; SIMD128-FAST-NEXT: f32.select $push11=, $pop14, $pop12, $pop10 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push0=, $pop9, 3, $pop11 ; SIMD128-FAST-NEXT: return $pop0 ; ; NO-SIMD128-LABEL: minnum_intrinsic_v4f32: @@ -13540,6 +13594,216 @@ ret <4 x float> %a } +define <4 x float> @minnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: minnum_nsz_intrinsic_v4f32: +; SIMD128: .functype minnum_nsz_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.min $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: minnum_nsz_intrinsic_v4f32: +; SIMD128-FAST: .functype minnum_nsz_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f32x4.min $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: minnum_nsz_intrinsic_v4f32: +; NO-SIMD128: .functype minnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: call $push0=, fminf, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop0 +; NO-SIMD128-NEXT: call $push1=, fminf, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-NEXT: call $push2=, fminf, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push3=, 12 +; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-NEXT: call $push5=, fminf, $4, $8 +; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: minnum_nsz_intrinsic_v4f32: +; NO-SIMD128-FAST: .functype minnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: call $push0=, fminf, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: call $push1=, fminf, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: call $push5=, fminf, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return + %a = call nnan nsz <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) + ret <4 x float> %a +} + +define <4 x float> @fminnumv432_non_zero_intrinsic(<4 x float> %x) { +; SIMD128-LABEL: fminnumv432_non_zero_intrinsic: +; SIMD128: .functype fminnumv432_non_zero_intrinsic (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, -0x1p0, -0x1p0, -0x1p0, -0x1p0 +; SIMD128-NEXT: f32x4.min $push1=, $0, $pop0 +; SIMD128-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: fminnumv432_non_zero_intrinsic: +; SIMD128-FAST: .functype fminnumv432_non_zero_intrinsic (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.const $push1=, -0x1p0, -0x1p0, -0x1p0, -0x1p0 +; SIMD128-FAST-NEXT: f32x4.min $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: fminnumv432_non_zero_intrinsic: +; NO-SIMD128: .functype fminnumv432_non_zero_intrinsic (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0 +; NO-SIMD128-NEXT: call $push1=, fminf, $3, $pop0 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0 +; NO-SIMD128-NEXT: call $push2=, fminf, $2, $pop9 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0 +; NO-SIMD128-NEXT: call $push3=, fminf, $1, $pop8 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-NEXT: call $push6=, fminf, $4, $pop7 +; NO-SIMD128-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: fminnumv432_non_zero_intrinsic: +; NO-SIMD128-FAST: .functype fminnumv432_non_zero_intrinsic (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.const $push0=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push1=, fminf, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $2, $pop9 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $3, $pop8 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push6=, fminf, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: return + %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float>) + ret <4 x float> %a +} + +define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) { +; SIMD128-LABEL: fminnumv432_one_zero_intrinsic: +; SIMD128: .functype fminnumv432_one_zero_intrinsic (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.extract_lane $push27=, $0, 0 +; SIMD128-NEXT: local.tee $push26=, $1=, $pop27 +; SIMD128-NEXT: f32.const $push3=, -0x1p0 +; SIMD128-NEXT: f32.const $push25=, -0x1p0 +; SIMD128-NEXT: f32.lt $push4=, $1, $pop25 +; SIMD128-NEXT: f32.select $push5=, $pop26, $pop3, $pop4 +; SIMD128-NEXT: f32x4.splat $push6=, $pop5 +; SIMD128-NEXT: f32x4.extract_lane $push24=, $0, 1 +; SIMD128-NEXT: local.tee $push23=, $1=, $pop24 +; SIMD128-NEXT: f32.const $push0=, 0x0p0 +; SIMD128-NEXT: f32.const $push22=, 0x0p0 +; SIMD128-NEXT: f32.lt $push1=, $1, $pop22 +; SIMD128-NEXT: f32.select $push2=, $pop23, $pop0, $pop1 +; SIMD128-NEXT: f32x4.replace_lane $push7=, $pop6, 1, $pop2 +; SIMD128-NEXT: f32x4.extract_lane $push21=, $0, 2 +; SIMD128-NEXT: local.tee $push20=, $1=, $pop21 +; SIMD128-NEXT: f32.const $push19=, -0x1p0 +; SIMD128-NEXT: f32.const $push18=, -0x1p0 +; SIMD128-NEXT: f32.lt $push8=, $1, $pop18 +; SIMD128-NEXT: f32.select $push9=, $pop20, $pop19, $pop8 +; SIMD128-NEXT: f32x4.replace_lane $push10=, $pop7, 2, $pop9 +; SIMD128-NEXT: f32x4.extract_lane $push17=, $0, 3 +; SIMD128-NEXT: local.tee $push16=, $1=, $pop17 +; SIMD128-NEXT: f32.const $push15=, -0x1p0 +; SIMD128-NEXT: f32.const $push14=, -0x1p0 +; SIMD128-NEXT: f32.lt $push11=, $1, $pop14 +; SIMD128-NEXT: f32.select $push12=, $pop16, $pop15, $pop11 +; SIMD128-NEXT: f32x4.replace_lane $push13=, $pop10, 3, $pop12 +; SIMD128-NEXT: return $pop13 +; +; SIMD128-FAST-LABEL: fminnumv432_one_zero_intrinsic: +; SIMD128-FAST: .functype fminnumv432_one_zero_intrinsic (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f32x4.extract_lane $push27=, $0, 0 +; SIMD128-FAST-NEXT: local.tee $push26=, $1=, $pop27 +; SIMD128-FAST-NEXT: f32.const $push4=, -0x1p0 +; SIMD128-FAST-NEXT: f32.const $push25=, -0x1p0 +; SIMD128-FAST-NEXT: f32.lt $push5=, $1, $pop25 +; SIMD128-FAST-NEXT: f32.select $push6=, $pop26, $pop4, $pop5 +; SIMD128-FAST-NEXT: f32x4.splat $push7=, $pop6 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push24=, $0, 1 +; SIMD128-FAST-NEXT: local.tee $push23=, $1=, $pop24 +; SIMD128-FAST-NEXT: f32.const $push1=, 0x0p0 +; SIMD128-FAST-NEXT: f32.const $push22=, 0x0p0 +; SIMD128-FAST-NEXT: f32.lt $push2=, $1, $pop22 +; SIMD128-FAST-NEXT: f32.select $push3=, $pop23, $pop1, $pop2 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push8=, $pop7, 1, $pop3 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push21=, $0, 2 +; SIMD128-FAST-NEXT: local.tee $push20=, $1=, $pop21 +; SIMD128-FAST-NEXT: f32.const $push19=, -0x1p0 +; SIMD128-FAST-NEXT: f32.const $push18=, -0x1p0 +; SIMD128-FAST-NEXT: f32.lt $push9=, $1, $pop18 +; SIMD128-FAST-NEXT: f32.select $push10=, $pop20, $pop19, $pop9 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push11=, $pop8, 2, $pop10 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push17=, $0, 3 +; SIMD128-FAST-NEXT: local.tee $push16=, $1=, $pop17 +; SIMD128-FAST-NEXT: f32.const $push15=, -0x1p0 +; SIMD128-FAST-NEXT: f32.const $push14=, -0x1p0 +; SIMD128-FAST-NEXT: f32.lt $push12=, $1, $pop14 +; SIMD128-FAST-NEXT: f32.select $push13=, $pop16, $pop15, $pop12 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push0=, $pop11, 3, $pop13 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: fminnumv432_one_zero_intrinsic: +; NO-SIMD128: .functype fminnumv432_one_zero_intrinsic (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0 +; NO-SIMD128-NEXT: call $push1=, fminf, $3, $pop0 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.const $push2=, 0x0p0 +; NO-SIMD128-NEXT: call $push3=, fminf, $2, $pop2 +; NO-SIMD128-NEXT: f32.store 4($0), $pop3 +; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0 +; NO-SIMD128-NEXT: call $push4=, fminf, $1, $pop9 +; NO-SIMD128-NEXT: f32.store 0($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push5=, 12 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0 +; NO-SIMD128-NEXT: call $push7=, fminf, $4, $pop8 +; NO-SIMD128-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: fminnumv432_one_zero_intrinsic: +; NO-SIMD128-FAST: .functype fminnumv432_one_zero_intrinsic (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.const $push0=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push1=, fminf, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x0p0 +; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $2, $pop2 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push4=, fminf, $3, $pop9 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push5=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push7=, fminf, $4, $pop8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-FAST-NEXT: return + %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float>) + ret <4 x float> %a +} + declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>) define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; SIMD128-LABEL: max_intrinsic_v4f32: @@ -13592,13 +13856,67 @@ ; SIMD128-LABEL: maxnum_intrinsic_v4f32: ; SIMD128: .functype maxnum_intrinsic_v4f32 (v128, v128) -> (v128) ; SIMD128-NEXT: # %bb.0: -; SIMD128-NEXT: f32x4.max $push0=, $0, $1 -; SIMD128-NEXT: return $pop0 +; SIMD128-NEXT: f32x4.extract_lane $push27=, $0, 0 +; SIMD128-NEXT: local.tee $push26=, $3=, $pop27 +; SIMD128-NEXT: f32x4.extract_lane $push25=, $1, 0 +; SIMD128-NEXT: local.tee $push24=, $2=, $pop25 +; SIMD128-NEXT: f32.gt $push2=, $3, $2 +; SIMD128-NEXT: f32.select $push3=, $pop26, $pop24, $pop2 +; SIMD128-NEXT: f32x4.splat $push4=, $pop3 +; SIMD128-NEXT: f32x4.extract_lane $push23=, $0, 1 +; SIMD128-NEXT: local.tee $push22=, $3=, $pop23 +; SIMD128-NEXT: f32x4.extract_lane $push21=, $1, 1 +; SIMD128-NEXT: local.tee $push20=, $2=, $pop21 +; SIMD128-NEXT: f32.gt $push0=, $3, $2 +; SIMD128-NEXT: f32.select $push1=, $pop22, $pop20, $pop0 +; SIMD128-NEXT: f32x4.replace_lane $push5=, $pop4, 1, $pop1 +; SIMD128-NEXT: f32x4.extract_lane $push19=, $0, 2 +; SIMD128-NEXT: local.tee $push18=, $3=, $pop19 +; SIMD128-NEXT: f32x4.extract_lane $push17=, $1, 2 +; SIMD128-NEXT: local.tee $push16=, $2=, $pop17 +; SIMD128-NEXT: f32.gt $push6=, $3, $2 +; SIMD128-NEXT: f32.select $push7=, $pop18, $pop16, $pop6 +; SIMD128-NEXT: f32x4.replace_lane $push8=, $pop5, 2, $pop7 +; SIMD128-NEXT: f32x4.extract_lane $push15=, $0, 3 +; SIMD128-NEXT: local.tee $push14=, $3=, $pop15 +; SIMD128-NEXT: f32x4.extract_lane $push13=, $1, 3 +; SIMD128-NEXT: local.tee $push12=, $2=, $pop13 +; SIMD128-NEXT: f32.gt $push9=, $3, $2 +; SIMD128-NEXT: f32.select $push10=, $pop14, $pop12, $pop9 +; SIMD128-NEXT: f32x4.replace_lane $push11=, $pop8, 3, $pop10 +; SIMD128-NEXT: return $pop11 ; ; SIMD128-FAST-LABEL: maxnum_intrinsic_v4f32: ; SIMD128-FAST: .functype maxnum_intrinsic_v4f32 (v128, v128) -> (v128) ; SIMD128-FAST-NEXT: # %bb.0: -; SIMD128-FAST-NEXT: f32x4.max $push0=, $0, $1 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push27=, $0, 0 +; SIMD128-FAST-NEXT: local.tee $push26=, $3=, $pop27 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push25=, $1, 0 +; SIMD128-FAST-NEXT: local.tee $push24=, $2=, $pop25 +; SIMD128-FAST-NEXT: f32.gt $push3=, $3, $2 +; SIMD128-FAST-NEXT: f32.select $push4=, $pop26, $pop24, $pop3 +; SIMD128-FAST-NEXT: f32x4.splat $push5=, $pop4 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push23=, $0, 1 +; SIMD128-FAST-NEXT: local.tee $push22=, $3=, $pop23 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push21=, $1, 1 +; SIMD128-FAST-NEXT: local.tee $push20=, $2=, $pop21 +; SIMD128-FAST-NEXT: f32.gt $push1=, $3, $2 +; SIMD128-FAST-NEXT: f32.select $push2=, $pop22, $pop20, $pop1 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push6=, $pop5, 1, $pop2 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push19=, $0, 2 +; SIMD128-FAST-NEXT: local.tee $push18=, $3=, $pop19 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push17=, $1, 2 +; SIMD128-FAST-NEXT: local.tee $push16=, $2=, $pop17 +; SIMD128-FAST-NEXT: f32.gt $push7=, $3, $2 +; SIMD128-FAST-NEXT: f32.select $push8=, $pop18, $pop16, $pop7 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push9=, $pop6, 2, $pop8 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push15=, $0, 3 +; SIMD128-FAST-NEXT: local.tee $push14=, $3=, $pop15 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push13=, $1, 3 +; SIMD128-FAST-NEXT: local.tee $push12=, $2=, $pop13 +; SIMD128-FAST-NEXT: f32.gt $push10=, $3, $2 +; SIMD128-FAST-NEXT: f32.select $push11=, $pop14, $pop12, $pop10 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push0=, $pop9, 3, $pop11 ; SIMD128-FAST-NEXT: return $pop0 ; ; NO-SIMD128-LABEL: maxnum_intrinsic_v4f32: @@ -13634,6 +13952,216 @@ ret <4 x float> %a } +define <4 x float> @maxnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: maxnum_nsz_intrinsic_v4f32: +; SIMD128: .functype maxnum_nsz_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.max $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: maxnum_nsz_intrinsic_v4f32: +; SIMD128-FAST: .functype maxnum_nsz_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f32x4.max $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: maxnum_nsz_intrinsic_v4f32: +; NO-SIMD128: .functype maxnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: call $push0=, fmaxf, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop0 +; NO-SIMD128-NEXT: call $push1=, fmaxf, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-NEXT: call $push2=, fmaxf, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push3=, 12 +; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-NEXT: call $push5=, fmaxf, $4, $8 +; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: maxnum_nsz_intrinsic_v4f32: +; NO-SIMD128-FAST: .functype maxnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: call $push0=, fmaxf, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: call $push1=, fmaxf, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: call $push2=, fmaxf, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return + %a = call nnan nsz <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) + ret <4 x float> %a +} + +define <4 x float> @maxnum_one_zero_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: maxnum_one_zero_intrinsic_v4f32: +; SIMD128: .functype maxnum_one_zero_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.extract_lane $push27=, $0, 0 +; SIMD128-NEXT: local.tee $push26=, $2=, $pop27 +; SIMD128-NEXT: f32.const $push3=, -0x1p0 +; SIMD128-NEXT: f32.const $push25=, -0x1p0 +; SIMD128-NEXT: f32.gt $push4=, $2, $pop25 +; SIMD128-NEXT: f32.select $push5=, $pop26, $pop3, $pop4 +; SIMD128-NEXT: f32x4.splat $push6=, $pop5 +; SIMD128-NEXT: f32x4.extract_lane $push24=, $0, 1 +; SIMD128-NEXT: local.tee $push23=, $2=, $pop24 +; SIMD128-NEXT: f32.const $push0=, 0x0p0 +; SIMD128-NEXT: f32.const $push22=, 0x0p0 +; SIMD128-NEXT: f32.gt $push1=, $2, $pop22 +; SIMD128-NEXT: f32.select $push2=, $pop23, $pop0, $pop1 +; SIMD128-NEXT: f32x4.replace_lane $push7=, $pop6, 1, $pop2 +; SIMD128-NEXT: f32x4.extract_lane $push21=, $0, 2 +; SIMD128-NEXT: local.tee $push20=, $2=, $pop21 +; SIMD128-NEXT: f32.const $push19=, -0x1p0 +; SIMD128-NEXT: f32.const $push18=, -0x1p0 +; SIMD128-NEXT: f32.gt $push8=, $2, $pop18 +; SIMD128-NEXT: f32.select $push9=, $pop20, $pop19, $pop8 +; SIMD128-NEXT: f32x4.replace_lane $push10=, $pop7, 2, $pop9 +; SIMD128-NEXT: f32x4.extract_lane $push17=, $0, 3 +; SIMD128-NEXT: local.tee $push16=, $2=, $pop17 +; SIMD128-NEXT: f32.const $push15=, -0x1p0 +; SIMD128-NEXT: f32.const $push14=, -0x1p0 +; SIMD128-NEXT: f32.gt $push11=, $2, $pop14 +; SIMD128-NEXT: f32.select $push12=, $pop16, $pop15, $pop11 +; SIMD128-NEXT: f32x4.replace_lane $push13=, $pop10, 3, $pop12 +; SIMD128-NEXT: return $pop13 +; +; SIMD128-FAST-LABEL: maxnum_one_zero_intrinsic_v4f32: +; SIMD128-FAST: .functype maxnum_one_zero_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f32x4.extract_lane $push27=, $0, 0 +; SIMD128-FAST-NEXT: local.tee $push26=, $2=, $pop27 +; SIMD128-FAST-NEXT: f32.const $push4=, -0x1p0 +; SIMD128-FAST-NEXT: f32.const $push25=, -0x1p0 +; SIMD128-FAST-NEXT: f32.gt $push5=, $2, $pop25 +; SIMD128-FAST-NEXT: f32.select $push6=, $pop26, $pop4, $pop5 +; SIMD128-FAST-NEXT: f32x4.splat $push7=, $pop6 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push24=, $0, 1 +; SIMD128-FAST-NEXT: local.tee $push23=, $2=, $pop24 +; SIMD128-FAST-NEXT: f32.const $push1=, 0x0p0 +; SIMD128-FAST-NEXT: f32.const $push22=, 0x0p0 +; SIMD128-FAST-NEXT: f32.gt $push2=, $2, $pop22 +; SIMD128-FAST-NEXT: f32.select $push3=, $pop23, $pop1, $pop2 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push8=, $pop7, 1, $pop3 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push21=, $0, 2 +; SIMD128-FAST-NEXT: local.tee $push20=, $2=, $pop21 +; SIMD128-FAST-NEXT: f32.const $push19=, -0x1p0 +; SIMD128-FAST-NEXT: f32.const $push18=, -0x1p0 +; SIMD128-FAST-NEXT: f32.gt $push9=, $2, $pop18 +; SIMD128-FAST-NEXT: f32.select $push10=, $pop20, $pop19, $pop9 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push11=, $pop8, 2, $pop10 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push17=, $0, 3 +; SIMD128-FAST-NEXT: local.tee $push16=, $2=, $pop17 +; SIMD128-FAST-NEXT: f32.const $push15=, -0x1p0 +; SIMD128-FAST-NEXT: f32.const $push14=, -0x1p0 +; SIMD128-FAST-NEXT: f32.gt $push12=, $2, $pop14 +; SIMD128-FAST-NEXT: f32.select $push13=, $pop16, $pop15, $pop12 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push0=, $pop11, 3, $pop13 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: maxnum_one_zero_intrinsic_v4f32: +; NO-SIMD128: .functype maxnum_one_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0 +; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $pop0 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.const $push2=, 0x0p0 +; NO-SIMD128-NEXT: call $push3=, fmaxf, $2, $pop2 +; NO-SIMD128-NEXT: f32.store 4($0), $pop3 +; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0 +; NO-SIMD128-NEXT: call $push4=, fmaxf, $1, $pop9 +; NO-SIMD128-NEXT: f32.store 0($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push5=, 12 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0 +; NO-SIMD128-NEXT: call $push7=, fmaxf, $4, $pop8 +; NO-SIMD128-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: maxnum_one_zero_intrinsic_v4f32: +; NO-SIMD128-FAST: .functype maxnum_one_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.const $push0=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push1=, fmaxf, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x0p0 +; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $2, $pop2 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop9 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push5=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push7=, fmaxf, $4, $pop8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-FAST-NEXT: return + %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float>) + ret <4 x float> %a +} + +define <4 x float> @maxnum_non_zero_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: maxnum_non_zero_intrinsic_v4f32: +; SIMD128: .functype maxnum_non_zero_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, -0x1p0, 0x1p0, -0x1p0, -0x1p0 +; SIMD128-NEXT: f32x4.max $push1=, $0, $pop0 +; SIMD128-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: maxnum_non_zero_intrinsic_v4f32: +; SIMD128-FAST: .functype maxnum_non_zero_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.const $push1=, -0x1p0, 0x1p0, -0x1p0, -0x1p0 +; SIMD128-FAST-NEXT: f32x4.max $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: maxnum_non_zero_intrinsic_v4f32: +; NO-SIMD128: .functype maxnum_non_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0 +; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $pop0 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.const $push2=, 0x1p0 +; NO-SIMD128-NEXT: call $push3=, fmaxf, $2, $pop2 +; NO-SIMD128-NEXT: f32.store 4($0), $pop3 +; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0 +; NO-SIMD128-NEXT: call $push4=, fmaxf, $1, $pop9 +; NO-SIMD128-NEXT: f32.store 0($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push5=, 12 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0 +; NO-SIMD128-NEXT: call $push7=, fmaxf, $4, $pop8 +; NO-SIMD128-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: maxnum_non_zero_intrinsic_v4f32: +; NO-SIMD128-FAST: .functype maxnum_non_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.const $push0=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push1=, fmaxf, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x1p0 +; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $2, $pop2 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop9 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push5=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push7=, fmaxf, $4, $pop8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-FAST-NEXT: return + %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float>) + ret <4 x float> %a +} + define <4 x float> @min_const_intrinsic_v4f32() { ; SIMD128-LABEL: min_const_intrinsic_v4f32: ; SIMD128: .functype min_const_intrinsic_v4f32 () -> (v128)