diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -5042,6 +5042,9 @@ /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs. SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const; + /// Expand fminimum/fmaximum into multiple comparison with selects. + SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const; + /// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max. /// \param N Node to expand /// \returns The expansion result diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3523,6 +3523,12 @@ Results.push_back(Expanded); break; } + case ISD::FMINIMUM: + case ISD::FMAXIMUM: { + if (SDValue Expanded = TLI.expandFMINIMUM_FMAXIMUM(Node, DAG)) + Results.push_back(Expanded); + break; + } case ISD::FSIN: case ISD::FCOS: { EVT VT = Node->getValueType(0); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -877,6 +877,13 @@ return; } break; + case ISD::FMINIMUM: + case ISD::FMAXIMUM: + if (SDValue Expanded = TLI.expandFMINIMUM_FMAXIMUM(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8151,6 +8151,64 @@ return SDValue(); } +SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N, + SelectionDAG &DAG) const { + SDLoc DL(N); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + unsigned Opc = N->getOpcode(); + EVT VT = N->getValueType(0); + EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + bool NoNaN = (N->getFlags().hasNoNaNs() || + (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))); + bool NoZeroSign = + (N->getFlags().hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) || + DAG.isKnownNeverZeroFloat(RHS)); + bool IsMax = Opc == ISD::FMAXIMUM; + + if (VT.isVector() && + isOperationLegalOrCustomOrPromote(Opc, VT.getScalarType())) + return SDValue(); + + SDValue MinMax; + if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE, + VT)) + MinMax = DAG.getNode(IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE, DL, VT, + LHS, RHS); + else if (isOperationLegalOrCustom(IsMax ? ISD::FMAXNUM : ISD::FMINNUM, VT)) + MinMax = DAG.getNode(IsMax ? ISD::FMAXNUM : ISD::FMINNUM, DL, VT, LHS, RHS); + else + MinMax = DAG.getSelect( + DL, VT, + DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT), LHS, + RHS); + + // Propagate any NaN of both operands + if (!NoNaN) { + ConstantFP *FPNaN = ConstantFP::get( + *DAG.getContext(), APFloat::getNaN(DAG.EVTToAPFloatSemantics(VT))); + MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO), + DAG.getConstantFP(*FPNaN, DL, VT), MinMax); + } + + // fminimum/fmaximum requires -0.0 less than +0.0 + if (!NoZeroSign) { + SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax, + DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ); + SDValue TestZero = + DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32); + SDValue LCmp = DAG.getSelect( + DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS, + MinMax); + SDValue RCmp = DAG.getSelect( + DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, + LCmp); + MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax); + } + + return MinMax; +} + /// Returns a true value if if this FPClassTest can be performed with an ordered /// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns /// std::nullopt if it cannot be performed as a compare with 0. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1543,15 +1543,11 @@ if (Subtarget->hasNEON()) { // vmin and vmax aren't available in a scalar form, so we can use - // a NEON instruction with an undef lane instead. This has a performance - // penalty on some cores, so we don't do this unless we have been - // asked to by the core tuning model. - if (Subtarget->useNEONForSinglePrecisionFP()) { - setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); - setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); - setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); - setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); - } + // a NEON instruction with an undef lane instead. + setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); + setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); + setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); + setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal); setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal); setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -438,8 +438,8 @@ if (Subtarget.is64Bit()) setOperationAction(ISD::FPOWI, MVT::i32, Custom); - if (!Subtarget.hasStdExtZfa()) - setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom); + setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, + Subtarget.hasStdExtZfa() ? Legal : Custom); } if (Subtarget.hasStdExtFOrZfinx()) { @@ -462,9 +462,10 @@ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom); setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom); - if (Subtarget.hasStdExtZfa()) + if (Subtarget.hasStdExtZfa()) { setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); - else + setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal); + } else setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom); } @@ -479,6 +480,7 @@ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); setOperationAction(ISD::BITCAST, MVT::i64, Custom); setOperationAction(ISD::BITCAST, MVT::f64, Custom); + setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Legal); } else { if (Subtarget.is64Bit()) setOperationAction(FPRndMode, MVT::f64, Custom); diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll --- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll +++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll @@ -45,12 +45,10 @@ define float @fminnum32_nsz_intrinsic(float %x, float %y) { ; ARMV7-LABEL: fminnum32_nsz_intrinsic: ; ARMV7: @ %bb.0: -; ARMV7-NEXT: vmov s0, r0 -; ARMV7-NEXT: vmov s2, r1 -; ARMV7-NEXT: vcmp.f32 s0, s2 -; ARMV7-NEXT: vmrs APSR_nzcv, fpscr -; ARMV7-NEXT: vmovlt.f32 s2, s0 -; ARMV7-NEXT: vmov r0, s2 +; ARMV7-NEXT: vmov s0, r1 +; ARMV7-NEXT: vmov s2, r0 +; ARMV7-NEXT: vmin.f32 d0, d1, d0 +; ARMV7-NEXT: vmov r0, s0 ; ARMV7-NEXT: bx lr ; ; ARMV8-LABEL: fminnum32_nsz_intrinsic: @@ -77,9 +75,7 @@ ; ARMV7: @ %bb.0: ; ARMV7-NEXT: vmov.f32 s0, #-1.000000e+00 ; ARMV7-NEXT: vmov s2, r0 -; ARMV7-NEXT: vcmp.f32 s2, s0 -; ARMV7-NEXT: vmrs APSR_nzcv, fpscr -; ARMV7-NEXT: vmovlt.f32 s0, s2 +; ARMV7-NEXT: vmin.f32 d0, d1, d0 ; ARMV7-NEXT: vmov r0, s0 ; ARMV7-NEXT: bx lr ; @@ -135,12 +131,10 @@ define float @fmaxnum32_nsz_intrinsic(float %x, float %y) { ; ARMV7-LABEL: fmaxnum32_nsz_intrinsic: ; ARMV7: @ %bb.0: -; ARMV7-NEXT: vmov s0, r0 -; ARMV7-NEXT: vmov s2, r1 -; ARMV7-NEXT: vcmp.f32 s0, s2 -; ARMV7-NEXT: vmrs APSR_nzcv, fpscr -; ARMV7-NEXT: vmovgt.f32 s2, s0 -; ARMV7-NEXT: vmov r0, s2 +; ARMV7-NEXT: vmov s0, r1 +; ARMV7-NEXT: vmov s2, r0 +; ARMV7-NEXT: vmax.f32 d0, d1, d0 +; ARMV7-NEXT: vmov r0, s0 ; ARMV7-NEXT: bx lr ; ; ARMV8-LABEL: fmaxnum32_nsz_intrinsic: @@ -209,9 +203,7 @@ ; ARMV7: @ %bb.0: ; ARMV7-NEXT: vmov.f32 s0, #1.000000e+00 ; ARMV7-NEXT: vmov s2, r0 -; ARMV7-NEXT: vcmp.f32 s2, s0 -; ARMV7-NEXT: vmrs APSR_nzcv, fpscr -; ARMV7-NEXT: vmovgt.f32 s0, s2 +; ARMV7-NEXT: vmax.f32 d0, d1, d0 ; ARMV7-NEXT: vmov r0, s0 ; ARMV7-NEXT: bx lr ; diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s + +define fp128 @f128_minimum(fp128 %a, fp128 %b) { +; CHECK-LABEL: f128_minimum: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscmpuqp 0, 2, 3 +; CHECK-NEXT: vmr 4, 2 +; CHECK-NEXT: bge 0, .LBB0_8 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bun 0, .LBB0_9 +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: xststdcqp 0, 2, 4 +; CHECK-NEXT: bc 4, 2, .LBB0_10 +; CHECK-NEXT: .LBB0_3: # %entry +; CHECK-NEXT: xststdcqp 0, 3, 4 +; CHECK-NEXT: bc 12, 2, .LBB0_5 +; CHECK-NEXT: .LBB0_4: # %entry +; CHECK-NEXT: vmr 3, 2 +; CHECK-NEXT: .LBB0_5: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l +; CHECK-NEXT: lxv 34, 0(3) +; CHECK-NEXT: xscmpuqp 0, 4, 2 +; CHECK-NEXT: beq 0, .LBB0_7 +; CHECK-NEXT: # %bb.6: # %entry +; CHECK-NEXT: vmr 3, 4 +; CHECK-NEXT: .LBB0_7: # %entry +; CHECK-NEXT: vmr 2, 3 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB0_8: # %entry +; CHECK-NEXT: vmr 4, 3 +; CHECK-NEXT: bnu 0, .LBB0_2 +; CHECK-NEXT: .LBB0_9: +; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK-NEXT: lxv 36, 0(3) +; CHECK-NEXT: xststdcqp 0, 2, 4 +; CHECK-NEXT: bc 12, 2, .LBB0_3 +; CHECK-NEXT: .LBB0_10: # %entry +; CHECK-NEXT: vmr 2, 4 +; CHECK-NEXT: xststdcqp 0, 3, 4 +; CHECK-NEXT: bc 4, 2, .LBB0_4 +; CHECK-NEXT: b .LBB0_5 +entry: + %m = call fp128 @llvm.minimum.f128(fp128 %a, fp128 %b) + ret fp128 %m +} + +define fp128 @f128_maximum(fp128 %a, fp128 %b) { +; CHECK-LABEL: f128_maximum: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscmpuqp 0, 2, 3 +; CHECK-NEXT: vmr 4, 2 +; CHECK-NEXT: ble 0, .LBB1_8 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bun 0, .LBB1_9 +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT: xststdcqp 0, 2, 8 +; CHECK-NEXT: bc 4, 2, .LBB1_10 +; CHECK-NEXT: .LBB1_3: # %entry +; CHECK-NEXT: xststdcqp 0, 3, 8 +; CHECK-NEXT: bc 12, 2, .LBB1_5 +; CHECK-NEXT: .LBB1_4: # %entry +; CHECK-NEXT: vmr 3, 2 +; CHECK-NEXT: .LBB1_5: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI1_1@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI1_1@toc@l +; CHECK-NEXT: lxv 34, 0(3) +; CHECK-NEXT: xscmpuqp 0, 4, 2 +; CHECK-NEXT: beq 0, .LBB1_7 +; CHECK-NEXT: # %bb.6: # %entry +; CHECK-NEXT: vmr 3, 4 +; CHECK-NEXT: .LBB1_7: # %entry +; CHECK-NEXT: vmr 2, 3 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB1_8: # %entry +; CHECK-NEXT: vmr 4, 3 +; CHECK-NEXT: bnu 0, .LBB1_2 +; CHECK-NEXT: .LBB1_9: +; CHECK-NEXT: addis 3, 2, .LCPI1_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI1_0@toc@l +; CHECK-NEXT: lxv 36, 0(3) +; CHECK-NEXT: xststdcqp 0, 2, 8 +; CHECK-NEXT: bc 12, 2, .LBB1_3 +; CHECK-NEXT: .LBB1_10: # %entry +; CHECK-NEXT: vmr 2, 4 +; CHECK-NEXT: xststdcqp 0, 3, 8 +; CHECK-NEXT: bc 4, 2, .LBB1_4 +; CHECK-NEXT: b .LBB1_5 +entry: + %m = call fp128 @llvm.maximum.f128(fp128 %a, fp128 %b) + ret fp128 %m +} + +declare fp128 @llvm.minimum.f128(fp128, fp128) +declare fp128 @llvm.maximum.f128(fp128, fp128) diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll @@ -0,0 +1,847 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s --check-prefix=NOVSX +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s --check-prefix=VSX +; RUN: llc -mtriple=powerpc64-ibm-aix -mcpu=pwr8 < %s | FileCheck %s --check-prefix=AIX + +define float @f32_minimum(float %a, float %b) { +; NOVSX-LABEL: f32_minimum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: fmr 0, 1 +; NOVSX-NEXT: stfs 2, -8(1) +; NOVSX-NEXT: stfs 1, -4(1) +; NOVSX-NEXT: bc 12, 0, .LBB0_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: .LBB0_2: # %entry +; NOVSX-NEXT: lwz 3, -4(1) +; NOVSX-NEXT: bc 4, 3, .LBB0_4 +; NOVSX-NEXT: # %bb.3: +; NOVSX-NEXT: addis 4, 2, .LCPI0_0@toc@ha +; NOVSX-NEXT: lfs 0, .LCPI0_0@toc@l(4) +; NOVSX-NEXT: .LBB0_4: # %entry +; NOVSX-NEXT: xoris 3, 3, 32768 +; NOVSX-NEXT: cmplwi 3, 0 +; NOVSX-NEXT: lwz 3, -8(1) +; NOVSX-NEXT: bc 12, 2, .LBB0_6 +; NOVSX-NEXT: # %bb.5: # %entry +; NOVSX-NEXT: fmr 1, 0 +; NOVSX-NEXT: .LBB0_6: # %entry +; NOVSX-NEXT: xoris 3, 3, 32768 +; NOVSX-NEXT: cmplwi 3, 0 +; NOVSX-NEXT: bc 12, 2, .LBB0_8 +; NOVSX-NEXT: # %bb.7: # %entry +; NOVSX-NEXT: fmr 2, 1 +; NOVSX-NEXT: .LBB0_8: # %entry +; NOVSX-NEXT: addis 3, 2, .LCPI0_1@toc@ha +; NOVSX-NEXT: lfs 1, .LCPI0_1@toc@l(3) +; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: bc 12, 2, .LBB0_10 +; NOVSX-NEXT: # %bb.9: # %entry +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: .LBB0_10: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: blr +; +; VSX-LABEL: f32_minimum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: fcmpu 0, 1, 2 +; VSX-NEXT: xscvdpspn 0, 1 +; VSX-NEXT: xscvdpspn 3, 2 +; VSX-NEXT: mffprwz 3, 0 +; VSX-NEXT: bc 12, 3, .LBB0_2 +; VSX-NEXT: # %bb.1: # %entry +; VSX-NEXT: xsmindp 0, 1, 2 +; VSX-NEXT: b .LBB0_3 +; VSX-NEXT: .LBB0_2: +; VSX-NEXT: addis 4, 2, .LCPI0_0@toc@ha +; VSX-NEXT: lfs 0, .LCPI0_0@toc@l(4) +; VSX-NEXT: .LBB0_3: # %entry +; VSX-NEXT: xoris 3, 3, 32768 +; VSX-NEXT: cmplwi 3, 0 +; VSX-NEXT: mffprwz 3, 3 +; VSX-NEXT: bc 12, 2, .LBB0_5 +; VSX-NEXT: # %bb.4: # %entry +; VSX-NEXT: fmr 1, 0 +; VSX-NEXT: .LBB0_5: # %entry +; VSX-NEXT: xoris 3, 3, 32768 +; VSX-NEXT: cmplwi 3, 0 +; VSX-NEXT: bc 12, 2, .LBB0_7 +; VSX-NEXT: # %bb.6: # %entry +; VSX-NEXT: fmr 2, 1 +; VSX-NEXT: .LBB0_7: # %entry +; VSX-NEXT: xxlxor 1, 1, 1 +; VSX-NEXT: fcmpu 0, 0, 1 +; VSX-NEXT: bc 12, 2, .LBB0_9 +; VSX-NEXT: # %bb.8: # %entry +; VSX-NEXT: fmr 2, 0 +; VSX-NEXT: .LBB0_9: # %entry +; VSX-NEXT: fmr 1, 2 +; VSX-NEXT: blr +; +; AIX-LABEL: f32_minimum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: fcmpu 0, 1, 2 +; AIX-NEXT: xscvdpspn 0, 1 +; AIX-NEXT: xscvdpspn 3, 2 +; AIX-NEXT: mffprwz 3, 0 +; AIX-NEXT: bc 12, 3, L..BB0_2 +; AIX-NEXT: # %bb.1: # %entry +; AIX-NEXT: xsmindp 0, 1, 2 +; AIX-NEXT: b L..BB0_3 +; AIX-NEXT: L..BB0_2: +; AIX-NEXT: ld 4, L..C0(2) # %const.0 +; AIX-NEXT: lfs 0, 0(4) +; AIX-NEXT: L..BB0_3: # %entry +; AIX-NEXT: xoris 3, 3, 32768 +; AIX-NEXT: cmplwi 3, 0 +; AIX-NEXT: mffprwz 3, 3 +; AIX-NEXT: bc 12, 2, L..BB0_5 +; AIX-NEXT: # %bb.4: # %entry +; AIX-NEXT: fmr 1, 0 +; AIX-NEXT: L..BB0_5: # %entry +; AIX-NEXT: xoris 3, 3, 32768 +; AIX-NEXT: cmplwi 3, 0 +; AIX-NEXT: bc 12, 2, L..BB0_7 +; AIX-NEXT: # %bb.6: # %entry +; AIX-NEXT: fmr 2, 1 +; AIX-NEXT: L..BB0_7: # %entry +; AIX-NEXT: xxlxor 1, 1, 1 +; AIX-NEXT: fcmpu 0, 0, 1 +; AIX-NEXT: bc 12, 2, L..BB0_9 +; AIX-NEXT: # %bb.8: # %entry +; AIX-NEXT: fmr 2, 0 +; AIX-NEXT: L..BB0_9: # %entry +; AIX-NEXT: fmr 1, 2 +; AIX-NEXT: blr +entry: + %m = call float @llvm.minimum.f32(float %a, float %b) + ret float %m +} + +define float @f32_maximum(float %a, float %b) { +; NOVSX-LABEL: f32_maximum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: fmr 0, 1 +; NOVSX-NEXT: stfs 2, -8(1) +; NOVSX-NEXT: stfs 1, -4(1) +; NOVSX-NEXT: bc 12, 1, .LBB1_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: .LBB1_2: # %entry +; NOVSX-NEXT: lwz 3, -4(1) +; NOVSX-NEXT: bc 4, 3, .LBB1_4 +; NOVSX-NEXT: # %bb.3: +; NOVSX-NEXT: addis 4, 2, .LCPI1_0@toc@ha +; NOVSX-NEXT: lfs 0, .LCPI1_0@toc@l(4) +; NOVSX-NEXT: .LBB1_4: # %entry +; NOVSX-NEXT: cmpwi 3, 0 +; NOVSX-NEXT: lwz 3, -8(1) +; NOVSX-NEXT: bc 12, 2, .LBB1_6 +; NOVSX-NEXT: # %bb.5: # %entry +; NOVSX-NEXT: fmr 1, 0 +; NOVSX-NEXT: .LBB1_6: # %entry +; NOVSX-NEXT: cmpwi 3, 0 +; NOVSX-NEXT: bc 12, 2, .LBB1_8 +; NOVSX-NEXT: # %bb.7: # %entry +; NOVSX-NEXT: fmr 2, 1 +; NOVSX-NEXT: .LBB1_8: # %entry +; NOVSX-NEXT: addis 3, 2, .LCPI1_1@toc@ha +; NOVSX-NEXT: lfs 1, .LCPI1_1@toc@l(3) +; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: bc 12, 2, .LBB1_10 +; NOVSX-NEXT: # %bb.9: # %entry +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: .LBB1_10: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: blr +; +; VSX-LABEL: f32_maximum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: fcmpu 0, 1, 2 +; VSX-NEXT: xscvdpspn 0, 1 +; VSX-NEXT: xscvdpspn 3, 2 +; VSX-NEXT: mffprwz 3, 0 +; VSX-NEXT: bc 12, 3, .LBB1_2 +; VSX-NEXT: # %bb.1: # %entry +; VSX-NEXT: xsmaxdp 0, 1, 2 +; VSX-NEXT: b .LBB1_3 +; VSX-NEXT: .LBB1_2: +; VSX-NEXT: addis 4, 2, .LCPI1_0@toc@ha +; VSX-NEXT: lfs 0, .LCPI1_0@toc@l(4) +; VSX-NEXT: .LBB1_3: # %entry +; VSX-NEXT: cmpwi 3, 0 +; VSX-NEXT: mffprwz 3, 3 +; VSX-NEXT: bc 12, 2, .LBB1_5 +; VSX-NEXT: # %bb.4: # %entry +; VSX-NEXT: fmr 1, 0 +; VSX-NEXT: .LBB1_5: # %entry +; VSX-NEXT: cmpwi 3, 0 +; VSX-NEXT: bc 12, 2, .LBB1_7 +; VSX-NEXT: # %bb.6: # %entry +; VSX-NEXT: fmr 2, 1 +; VSX-NEXT: .LBB1_7: # %entry +; VSX-NEXT: xxlxor 1, 1, 1 +; VSX-NEXT: fcmpu 0, 0, 1 +; VSX-NEXT: bc 12, 2, .LBB1_9 +; VSX-NEXT: # %bb.8: # %entry +; VSX-NEXT: fmr 2, 0 +; VSX-NEXT: .LBB1_9: # %entry +; VSX-NEXT: fmr 1, 2 +; VSX-NEXT: blr +; +; AIX-LABEL: f32_maximum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: fcmpu 0, 1, 2 +; AIX-NEXT: xscvdpspn 0, 1 +; AIX-NEXT: xscvdpspn 3, 2 +; AIX-NEXT: mffprwz 3, 0 +; AIX-NEXT: bc 12, 3, L..BB1_2 +; AIX-NEXT: # %bb.1: # %entry +; AIX-NEXT: xsmaxdp 0, 1, 2 +; AIX-NEXT: b L..BB1_3 +; AIX-NEXT: L..BB1_2: +; AIX-NEXT: ld 4, L..C1(2) # %const.0 +; AIX-NEXT: lfs 0, 0(4) +; AIX-NEXT: L..BB1_3: # %entry +; AIX-NEXT: cmpwi 3, 0 +; AIX-NEXT: mffprwz 3, 3 +; AIX-NEXT: bc 12, 2, L..BB1_5 +; AIX-NEXT: # %bb.4: # %entry +; AIX-NEXT: fmr 1, 0 +; AIX-NEXT: L..BB1_5: # %entry +; AIX-NEXT: cmpwi 3, 0 +; AIX-NEXT: bc 12, 2, L..BB1_7 +; AIX-NEXT: # %bb.6: # %entry +; AIX-NEXT: fmr 2, 1 +; AIX-NEXT: L..BB1_7: # %entry +; AIX-NEXT: xxlxor 1, 1, 1 +; AIX-NEXT: fcmpu 0, 0, 1 +; AIX-NEXT: bc 12, 2, L..BB1_9 +; AIX-NEXT: # %bb.8: # %entry +; AIX-NEXT: fmr 2, 0 +; AIX-NEXT: L..BB1_9: # %entry +; AIX-NEXT: fmr 1, 2 +; AIX-NEXT: blr +entry: + %m = call float @llvm.maximum.f32(float %a, float %b) + ret float %m +} + +define double @f64_minimum(double %a, double %b) { +; NOVSX-LABEL: f64_minimum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: fmr 0, 1 +; NOVSX-NEXT: stfd 2, -16(1) +; NOVSX-NEXT: stfd 1, -8(1) +; NOVSX-NEXT: bc 12, 0, .LBB2_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: .LBB2_2: # %entry +; NOVSX-NEXT: ld 3, -8(1) +; NOVSX-NEXT: bc 4, 3, .LBB2_4 +; NOVSX-NEXT: # %bb.3: +; NOVSX-NEXT: addis 4, 2, .LCPI2_0@toc@ha +; NOVSX-NEXT: lfs 0, .LCPI2_0@toc@l(4) +; NOVSX-NEXT: .LBB2_4: # %entry +; NOVSX-NEXT: li 4, 1 +; NOVSX-NEXT: rldic 4, 4, 63, 0 +; NOVSX-NEXT: cmpd 3, 4 +; NOVSX-NEXT: ld 3, -16(1) +; NOVSX-NEXT: bc 12, 2, .LBB2_6 +; NOVSX-NEXT: # %bb.5: # %entry +; NOVSX-NEXT: fmr 1, 0 +; NOVSX-NEXT: .LBB2_6: # %entry +; NOVSX-NEXT: cmpd 3, 4 +; NOVSX-NEXT: bc 12, 2, .LBB2_8 +; NOVSX-NEXT: # %bb.7: # %entry +; NOVSX-NEXT: fmr 2, 1 +; NOVSX-NEXT: .LBB2_8: # %entry +; NOVSX-NEXT: addis 3, 2, .LCPI2_1@toc@ha +; NOVSX-NEXT: lfs 1, .LCPI2_1@toc@l(3) +; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: bc 12, 2, .LBB2_10 +; NOVSX-NEXT: # %bb.9: # %entry +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: .LBB2_10: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: blr +; +; VSX-LABEL: f64_minimum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: fcmpu 0, 1, 2 +; VSX-NEXT: mffprd 3, 1 +; VSX-NEXT: bc 12, 3, .LBB2_2 +; VSX-NEXT: # %bb.1: # %entry +; VSX-NEXT: xsmindp 0, 1, 2 +; VSX-NEXT: b .LBB2_3 +; VSX-NEXT: .LBB2_2: +; VSX-NEXT: addis 4, 2, .LCPI2_0@toc@ha +; VSX-NEXT: lfs 0, .LCPI2_0@toc@l(4) +; VSX-NEXT: .LBB2_3: # %entry +; VSX-NEXT: li 4, 1 +; VSX-NEXT: rldic 4, 4, 63, 0 +; VSX-NEXT: cmpd 3, 4 +; VSX-NEXT: mffprd 3, 2 +; VSX-NEXT: bc 12, 2, .LBB2_5 +; VSX-NEXT: # %bb.4: # %entry +; VSX-NEXT: fmr 1, 0 +; VSX-NEXT: .LBB2_5: # %entry +; VSX-NEXT: cmpd 3, 4 +; VSX-NEXT: bc 12, 2, .LBB2_7 +; VSX-NEXT: # %bb.6: # %entry +; VSX-NEXT: fmr 2, 1 +; VSX-NEXT: .LBB2_7: # %entry +; VSX-NEXT: xxlxor 1, 1, 1 +; VSX-NEXT: fcmpu 0, 0, 1 +; VSX-NEXT: bc 12, 2, .LBB2_9 +; VSX-NEXT: # %bb.8: # %entry +; VSX-NEXT: fmr 2, 0 +; VSX-NEXT: .LBB2_9: # %entry +; VSX-NEXT: fmr 1, 2 +; VSX-NEXT: blr +; +; AIX-LABEL: f64_minimum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: fcmpu 0, 1, 2 +; AIX-NEXT: mffprd 3, 1 +; AIX-NEXT: bc 12, 3, L..BB2_2 +; AIX-NEXT: # %bb.1: # %entry +; AIX-NEXT: xsmindp 0, 1, 2 +; AIX-NEXT: b L..BB2_3 +; AIX-NEXT: L..BB2_2: +; AIX-NEXT: ld 4, L..C2(2) # %const.0 +; AIX-NEXT: lfs 0, 0(4) +; AIX-NEXT: L..BB2_3: # %entry +; AIX-NEXT: li 4, 1 +; AIX-NEXT: rldic 4, 4, 63, 0 +; AIX-NEXT: cmpd 3, 4 +; AIX-NEXT: mffprd 3, 2 +; AIX-NEXT: bc 12, 2, L..BB2_5 +; AIX-NEXT: # %bb.4: # %entry +; AIX-NEXT: fmr 1, 0 +; AIX-NEXT: L..BB2_5: # %entry +; AIX-NEXT: cmpd 3, 4 +; AIX-NEXT: bc 12, 2, L..BB2_7 +; AIX-NEXT: # %bb.6: # %entry +; AIX-NEXT: fmr 2, 1 +; AIX-NEXT: L..BB2_7: # %entry +; AIX-NEXT: xxlxor 1, 1, 1 +; AIX-NEXT: fcmpu 0, 0, 1 +; AIX-NEXT: bc 12, 2, L..BB2_9 +; AIX-NEXT: # %bb.8: # %entry +; AIX-NEXT: fmr 2, 0 +; AIX-NEXT: L..BB2_9: # %entry +; AIX-NEXT: fmr 1, 2 +; AIX-NEXT: blr +entry: + %m = call double @llvm.minimum.f64(double %a, double %b) + ret double %m +} + +define double @f64_maximum(double %a, double %b) { +; NOVSX-LABEL: f64_maximum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: fmr 0, 1 +; NOVSX-NEXT: stfd 2, -16(1) +; NOVSX-NEXT: stfd 1, -8(1) +; NOVSX-NEXT: bc 12, 1, .LBB3_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: .LBB3_2: # %entry +; NOVSX-NEXT: ld 3, -8(1) +; NOVSX-NEXT: bc 4, 3, .LBB3_4 +; NOVSX-NEXT: # %bb.3: +; NOVSX-NEXT: addis 4, 2, .LCPI3_0@toc@ha +; NOVSX-NEXT: lfs 0, .LCPI3_0@toc@l(4) +; NOVSX-NEXT: .LBB3_4: # %entry +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: ld 3, -16(1) +; NOVSX-NEXT: bc 12, 2, .LBB3_6 +; NOVSX-NEXT: # %bb.5: # %entry +; NOVSX-NEXT: fmr 1, 0 +; NOVSX-NEXT: .LBB3_6: # %entry +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: bc 12, 2, .LBB3_8 +; NOVSX-NEXT: # %bb.7: # %entry +; NOVSX-NEXT: fmr 2, 1 +; NOVSX-NEXT: .LBB3_8: # %entry +; NOVSX-NEXT: addis 3, 2, .LCPI3_1@toc@ha +; NOVSX-NEXT: lfs 1, .LCPI3_1@toc@l(3) +; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: bc 12, 2, .LBB3_10 +; NOVSX-NEXT: # %bb.9: # %entry +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: .LBB3_10: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: blr +; +; VSX-LABEL: f64_maximum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: fcmpu 0, 1, 2 +; VSX-NEXT: mffprd 3, 1 +; VSX-NEXT: bc 12, 3, .LBB3_2 +; VSX-NEXT: # %bb.1: # %entry +; VSX-NEXT: xsmaxdp 0, 1, 2 +; VSX-NEXT: b .LBB3_3 +; VSX-NEXT: .LBB3_2: +; VSX-NEXT: addis 4, 2, .LCPI3_0@toc@ha +; VSX-NEXT: lfs 0, .LCPI3_0@toc@l(4) +; VSX-NEXT: .LBB3_3: # %entry +; VSX-NEXT: cmpdi 3, 0 +; VSX-NEXT: mffprd 3, 2 +; VSX-NEXT: bc 12, 2, .LBB3_5 +; VSX-NEXT: # %bb.4: # %entry +; VSX-NEXT: fmr 1, 0 +; VSX-NEXT: .LBB3_5: # %entry +; VSX-NEXT: cmpdi 3, 0 +; VSX-NEXT: bc 12, 2, .LBB3_7 +; VSX-NEXT: # %bb.6: # %entry +; VSX-NEXT: fmr 2, 1 +; VSX-NEXT: .LBB3_7: # %entry +; VSX-NEXT: xxlxor 1, 1, 1 +; VSX-NEXT: fcmpu 0, 0, 1 +; VSX-NEXT: bc 12, 2, .LBB3_9 +; VSX-NEXT: # %bb.8: # %entry +; VSX-NEXT: fmr 2, 0 +; VSX-NEXT: .LBB3_9: # %entry +; VSX-NEXT: fmr 1, 2 +; VSX-NEXT: blr +; +; AIX-LABEL: f64_maximum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: fcmpu 0, 1, 2 +; AIX-NEXT: mffprd 3, 1 +; AIX-NEXT: bc 12, 3, L..BB3_2 +; AIX-NEXT: # %bb.1: # %entry +; AIX-NEXT: xsmaxdp 0, 1, 2 +; AIX-NEXT: b L..BB3_3 +; AIX-NEXT: L..BB3_2: +; AIX-NEXT: ld 4, L..C3(2) # %const.0 +; AIX-NEXT: lfs 0, 0(4) +; AIX-NEXT: L..BB3_3: # %entry +; AIX-NEXT: cmpdi 3, 0 +; AIX-NEXT: mffprd 3, 2 +; AIX-NEXT: bc 12, 2, L..BB3_5 +; AIX-NEXT: # %bb.4: # %entry +; AIX-NEXT: fmr 1, 0 +; AIX-NEXT: L..BB3_5: # %entry +; AIX-NEXT: cmpdi 3, 0 +; AIX-NEXT: bc 12, 2, L..BB3_7 +; AIX-NEXT: # %bb.6: # %entry +; AIX-NEXT: fmr 2, 1 +; AIX-NEXT: L..BB3_7: # %entry +; AIX-NEXT: xxlxor 1, 1, 1 +; AIX-NEXT: fcmpu 0, 0, 1 +; AIX-NEXT: bc 12, 2, L..BB3_9 +; AIX-NEXT: # %bb.8: # %entry +; AIX-NEXT: fmr 2, 0 +; AIX-NEXT: L..BB3_9: # %entry +; AIX-NEXT: fmr 1, 2 +; AIX-NEXT: blr +entry: + %m = call double @llvm.maximum.f64(double %a, double %b) + ret double %m +} + +define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) { +; NOVSX-LABEL: v4f32_minimum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: vcmpeqfp 5, 3, 3 +; NOVSX-NEXT: vspltisb 4, -1 +; NOVSX-NEXT: addis 3, 2, .LCPI4_0@toc@ha +; NOVSX-NEXT: vcmpeqfp 0, 2, 2 +; NOVSX-NEXT: addi 3, 3, .LCPI4_0@toc@l +; NOVSX-NEXT: vcmpgtfp 1, 3, 2 +; NOVSX-NEXT: vslw 4, 4, 4 +; NOVSX-NEXT: vnot 5, 5 +; NOVSX-NEXT: vnot 0, 0 +; NOVSX-NEXT: vsel 1, 3, 2, 1 +; NOVSX-NEXT: vor 5, 0, 5 +; NOVSX-NEXT: lvx 0, 0, 3 +; NOVSX-NEXT: vsel 5, 1, 0, 5 +; NOVSX-NEXT: vcmpequw 0, 2, 4 +; NOVSX-NEXT: vcmpequw 4, 3, 4 +; NOVSX-NEXT: vsel 2, 5, 2, 0 +; NOVSX-NEXT: vxor 0, 0, 0 +; NOVSX-NEXT: vsel 2, 2, 3, 4 +; NOVSX-NEXT: vcmpeqfp 3, 5, 0 +; NOVSX-NEXT: vsel 2, 5, 2, 3 +; NOVSX-NEXT: blr +; +; VSX-LABEL: v4f32_minimum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: xxleqv 36, 36, 36 +; VSX-NEXT: addis 3, 2, .LCPI4_0@toc@ha +; VSX-NEXT: xvcmpeqsp 0, 35, 35 +; VSX-NEXT: addi 3, 3, .LCPI4_0@toc@l +; VSX-NEXT: xvcmpeqsp 1, 34, 34 +; VSX-NEXT: lxvd2x 3, 0, 3 +; VSX-NEXT: vslw 4, 4, 4 +; VSX-NEXT: xvminsp 2, 34, 35 +; VSX-NEXT: xxlnor 0, 0, 0 +; VSX-NEXT: xxlnor 1, 1, 1 +; VSX-NEXT: vcmpequw 5, 2, 4 +; VSX-NEXT: xxlor 0, 1, 0 +; VSX-NEXT: vcmpequw 4, 3, 4 +; VSX-NEXT: xxsel 0, 2, 3, 0 +; VSX-NEXT: xxlxor 1, 1, 1 +; VSX-NEXT: xxsel 2, 0, 34, 37 +; VSX-NEXT: xvcmpeqsp 1, 0, 1 +; VSX-NEXT: xxsel 2, 2, 35, 36 +; VSX-NEXT: xxsel 34, 0, 2, 1 +; VSX-NEXT: blr +; +; AIX-LABEL: v4f32_minimum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: xxleqv 36, 36, 36 +; AIX-NEXT: ld 3, L..C4(2) # %const.0 +; AIX-NEXT: xvcmpeqsp 0, 35, 35 +; AIX-NEXT: xvcmpeqsp 1, 34, 34 +; AIX-NEXT: vslw 4, 4, 4 +; AIX-NEXT: lxvw4x 3, 0, 3 +; AIX-NEXT: xvminsp 2, 34, 35 +; AIX-NEXT: xxlnor 0, 0, 0 +; AIX-NEXT: xxlnor 1, 1, 1 +; AIX-NEXT: vcmpequw 5, 2, 4 +; AIX-NEXT: xxlor 0, 1, 0 +; AIX-NEXT: vcmpequw 4, 3, 4 +; AIX-NEXT: xxsel 0, 2, 3, 0 +; AIX-NEXT: xxlxor 1, 1, 1 +; AIX-NEXT: xxsel 2, 0, 34, 37 +; AIX-NEXT: xvcmpeqsp 1, 0, 1 +; AIX-NEXT: xxsel 2, 2, 35, 36 +; AIX-NEXT: xxsel 34, 0, 2, 1 +; AIX-NEXT: blr +entry: + %m = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %m +} + +define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) { +; NOVSX-LABEL: v4f32_maximum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: vcmpeqfp 4, 3, 3 +; NOVSX-NEXT: addis 3, 2, .LCPI5_0@toc@ha +; NOVSX-NEXT: vcmpeqfp 5, 2, 2 +; NOVSX-NEXT: addi 3, 3, .LCPI5_0@toc@l +; NOVSX-NEXT: vcmpgtfp 0, 2, 3 +; NOVSX-NEXT: lvx 1, 0, 3 +; NOVSX-NEXT: vnot 4, 4 +; NOVSX-NEXT: vnot 5, 5 +; NOVSX-NEXT: vsel 0, 3, 2, 0 +; NOVSX-NEXT: vor 4, 5, 4 +; NOVSX-NEXT: vxor 5, 5, 5 +; NOVSX-NEXT: vsel 4, 0, 1, 4 +; NOVSX-NEXT: vcmpequw 0, 2, 5 +; NOVSX-NEXT: vsel 2, 4, 2, 0 +; NOVSX-NEXT: vcmpequw 0, 3, 5 +; NOVSX-NEXT: vsel 2, 2, 3, 0 +; NOVSX-NEXT: vcmpeqfp 3, 4, 5 +; NOVSX-NEXT: vsel 2, 4, 2, 3 +; NOVSX-NEXT: blr +; +; VSX-LABEL: v4f32_maximum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: xvcmpeqsp 0, 35, 35 +; VSX-NEXT: addis 3, 2, .LCPI5_0@toc@ha +; VSX-NEXT: xvcmpeqsp 1, 34, 34 +; VSX-NEXT: addi 3, 3, .LCPI5_0@toc@l +; VSX-NEXT: xvmaxsp 2, 34, 35 +; VSX-NEXT: lxvd2x 3, 0, 3 +; VSX-NEXT: xxlxor 36, 36, 36 +; VSX-NEXT: vcmpequw 5, 2, 4 +; VSX-NEXT: xxlnor 0, 0, 0 +; VSX-NEXT: xxlnor 1, 1, 1 +; VSX-NEXT: vcmpequw 0, 3, 4 +; VSX-NEXT: xxlor 0, 1, 0 +; VSX-NEXT: xxsel 0, 2, 3, 0 +; VSX-NEXT: xxsel 1, 0, 34, 37 +; VSX-NEXT: xvcmpeqsp 2, 0, 36 +; VSX-NEXT: xxsel 1, 1, 35, 32 +; VSX-NEXT: xxsel 34, 0, 1, 2 +; VSX-NEXT: blr +; +; AIX-LABEL: v4f32_maximum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: xvcmpeqsp 0, 35, 35 +; AIX-NEXT: ld 3, L..C5(2) # %const.0 +; AIX-NEXT: xvcmpeqsp 1, 34, 34 +; AIX-NEXT: xvmaxsp 2, 34, 35 +; AIX-NEXT: xxlxor 36, 36, 36 +; AIX-NEXT: lxvw4x 3, 0, 3 +; AIX-NEXT: vcmpequw 5, 2, 4 +; AIX-NEXT: xxlnor 0, 0, 0 +; AIX-NEXT: xxlnor 1, 1, 1 +; AIX-NEXT: vcmpequw 0, 3, 4 +; AIX-NEXT: xxlor 0, 1, 0 +; AIX-NEXT: xxsel 0, 2, 3, 0 +; AIX-NEXT: xxsel 1, 0, 34, 37 +; AIX-NEXT: xvcmpeqsp 2, 0, 36 +; AIX-NEXT: xxsel 1, 1, 35, 32 +; AIX-NEXT: xxsel 34, 0, 1, 2 +; AIX-NEXT: blr +entry: + %m = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %m +} + +define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) { +; NOVSX-LABEL: v2f64_minimum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fcmpu 0, 1, 3 +; NOVSX-NEXT: fmr 6, 1 +; NOVSX-NEXT: stfd 4, -16(1) +; NOVSX-NEXT: stfd 2, -8(1) +; NOVSX-NEXT: stfd 3, -32(1) +; NOVSX-NEXT: stfd 1, -24(1) +; NOVSX-NEXT: bc 12, 0, .LBB6_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 6, 3 +; NOVSX-NEXT: .LBB6_2: # %entry +; NOVSX-NEXT: addis 3, 2, .LCPI6_0@toc@ha +; NOVSX-NEXT: ld 4, -24(1) +; NOVSX-NEXT: lfs 0, .LCPI6_0@toc@l(3) +; NOVSX-NEXT: fmr 5, 0 +; NOVSX-NEXT: bc 12, 3, .LBB6_4 +; NOVSX-NEXT: # %bb.3: # %entry +; NOVSX-NEXT: fmr 5, 6 +; NOVSX-NEXT: .LBB6_4: # %entry +; NOVSX-NEXT: li 3, 1 +; NOVSX-NEXT: rldic 3, 3, 63, 0 +; NOVSX-NEXT: cmpd 4, 3 +; NOVSX-NEXT: ld 4, -32(1) +; NOVSX-NEXT: bc 12, 2, .LBB6_6 +; NOVSX-NEXT: # %bb.5: # %entry +; NOVSX-NEXT: fmr 1, 5 +; NOVSX-NEXT: .LBB6_6: # %entry +; NOVSX-NEXT: cmpd 4, 3 +; NOVSX-NEXT: bc 12, 2, .LBB6_8 +; NOVSX-NEXT: # %bb.7: # %entry +; NOVSX-NEXT: fmr 3, 1 +; NOVSX-NEXT: .LBB6_8: # %entry +; NOVSX-NEXT: addis 4, 2, .LCPI6_1@toc@ha +; NOVSX-NEXT: lfs 1, .LCPI6_1@toc@l(4) +; NOVSX-NEXT: fcmpu 0, 5, 1 +; NOVSX-NEXT: bc 12, 2, .LBB6_10 +; NOVSX-NEXT: # %bb.9: # %entry +; NOVSX-NEXT: fmr 3, 5 +; NOVSX-NEXT: .LBB6_10: # %entry +; NOVSX-NEXT: fcmpu 0, 2, 4 +; NOVSX-NEXT: fmr 5, 2 +; NOVSX-NEXT: bc 12, 0, .LBB6_12 +; NOVSX-NEXT: # %bb.11: # %entry +; NOVSX-NEXT: fmr 5, 4 +; NOVSX-NEXT: .LBB6_12: # %entry +; NOVSX-NEXT: ld 4, -8(1) +; NOVSX-NEXT: bc 12, 3, .LBB6_14 +; NOVSX-NEXT: # %bb.13: # %entry +; NOVSX-NEXT: fmr 0, 5 +; NOVSX-NEXT: .LBB6_14: # %entry +; NOVSX-NEXT: cmpd 4, 3 +; NOVSX-NEXT: ld 4, -16(1) +; NOVSX-NEXT: bc 4, 2, .LBB6_19 +; NOVSX-NEXT: # %bb.15: # %entry +; NOVSX-NEXT: cmpd 4, 3 +; NOVSX-NEXT: bc 4, 2, .LBB6_20 +; NOVSX-NEXT: .LBB6_16: # %entry +; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: bc 12, 2, .LBB6_18 +; NOVSX-NEXT: .LBB6_17: # %entry +; NOVSX-NEXT: fmr 4, 0 +; NOVSX-NEXT: .LBB6_18: # %entry +; NOVSX-NEXT: fmr 1, 3 +; NOVSX-NEXT: fmr 2, 4 +; NOVSX-NEXT: blr +; NOVSX-NEXT: .LBB6_19: # %entry +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: cmpd 4, 3 +; NOVSX-NEXT: bc 12, 2, .LBB6_16 +; NOVSX-NEXT: .LBB6_20: # %entry +; NOVSX-NEXT: fmr 4, 2 +; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: bc 4, 2, .LBB6_17 +; NOVSX-NEXT: b .LBB6_18 +; +; VSX-LABEL: v2f64_minimum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: xvcmpeqdp 36, 35, 35 +; VSX-NEXT: addis 3, 2, .LCPI6_1@toc@ha +; VSX-NEXT: xvcmpeqdp 37, 34, 34 +; VSX-NEXT: addi 3, 3, .LCPI6_1@toc@l +; VSX-NEXT: xvmindp 0, 34, 35 +; VSX-NEXT: lxvd2x 32, 0, 3 +; VSX-NEXT: addis 3, 2, .LCPI6_0@toc@ha +; VSX-NEXT: addi 3, 3, .LCPI6_0@toc@l +; VSX-NEXT: lxvd2x 1, 0, 3 +; VSX-NEXT: vcmpequd 1, 2, 0 +; VSX-NEXT: xxlnor 36, 36, 36 +; VSX-NEXT: xxlnor 37, 37, 37 +; VSX-NEXT: xxlor 2, 37, 36 +; VSX-NEXT: vcmpequd 4, 3, 0 +; VSX-NEXT: xxsel 0, 0, 1, 2 +; VSX-NEXT: xxlxor 1, 1, 1 +; VSX-NEXT: xxsel 2, 0, 34, 33 +; VSX-NEXT: xvcmpeqdp 34, 0, 1 +; VSX-NEXT: xxsel 1, 2, 35, 36 +; VSX-NEXT: xxsel 34, 0, 1, 34 +; VSX-NEXT: blr +; +; AIX-LABEL: v2f64_minimum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: xvcmpeqdp 36, 35, 35 +; AIX-NEXT: ld 3, L..C6(2) # %const.1 +; AIX-NEXT: xvcmpeqdp 37, 34, 34 +; AIX-NEXT: xvmindp 0, 34, 35 +; AIX-NEXT: lxvd2x 32, 0, 3 +; AIX-NEXT: ld 3, L..C7(2) # %const.0 +; AIX-NEXT: xxlnor 36, 36, 36 +; AIX-NEXT: lxvd2x 1, 0, 3 +; AIX-NEXT: xxlnor 37, 37, 37 +; AIX-NEXT: vcmpequd 1, 2, 0 +; AIX-NEXT: xxlor 2, 37, 36 +; AIX-NEXT: vcmpequd 4, 3, 0 +; AIX-NEXT: xxsel 0, 0, 1, 2 +; AIX-NEXT: xxlxor 1, 1, 1 +; AIX-NEXT: xxsel 2, 0, 34, 33 +; AIX-NEXT: xvcmpeqdp 34, 0, 1 +; AIX-NEXT: xxsel 1, 2, 35, 36 +; AIX-NEXT: xxsel 34, 0, 1, 34 +; AIX-NEXT: blr +entry: + %m = call <2 x double> @llvm.minimum.v2f64(<2 x double> %a, <2 x double> %b) + ret <2 x double> %m +} + +define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) { +; NOVSX-LABEL: v2f64_maximum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fcmpu 0, 1, 3 +; NOVSX-NEXT: fmr 6, 1 +; NOVSX-NEXT: stfd 4, -16(1) +; NOVSX-NEXT: stfd 2, -8(1) +; NOVSX-NEXT: stfd 3, -32(1) +; NOVSX-NEXT: stfd 1, -24(1) +; NOVSX-NEXT: bc 12, 1, .LBB7_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 6, 3 +; NOVSX-NEXT: .LBB7_2: # %entry +; NOVSX-NEXT: addis 3, 2, .LCPI7_0@toc@ha +; NOVSX-NEXT: lfs 0, .LCPI7_0@toc@l(3) +; NOVSX-NEXT: ld 3, -24(1) +; NOVSX-NEXT: fmr 5, 0 +; NOVSX-NEXT: bc 12, 3, .LBB7_4 +; NOVSX-NEXT: # %bb.3: # %entry +; NOVSX-NEXT: fmr 5, 6 +; NOVSX-NEXT: .LBB7_4: # %entry +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: ld 3, -32(1) +; NOVSX-NEXT: bc 12, 2, .LBB7_6 +; NOVSX-NEXT: # %bb.5: # %entry +; NOVSX-NEXT: fmr 1, 5 +; NOVSX-NEXT: .LBB7_6: # %entry +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: bc 12, 2, .LBB7_8 +; NOVSX-NEXT: # %bb.7: # %entry +; NOVSX-NEXT: fmr 3, 1 +; NOVSX-NEXT: .LBB7_8: # %entry +; NOVSX-NEXT: addis 3, 2, .LCPI7_1@toc@ha +; NOVSX-NEXT: lfs 1, .LCPI7_1@toc@l(3) +; NOVSX-NEXT: fcmpu 0, 5, 1 +; NOVSX-NEXT: bc 12, 2, .LBB7_10 +; NOVSX-NEXT: # %bb.9: # %entry +; NOVSX-NEXT: fmr 3, 5 +; NOVSX-NEXT: .LBB7_10: # %entry +; NOVSX-NEXT: fcmpu 0, 2, 4 +; NOVSX-NEXT: fmr 5, 2 +; NOVSX-NEXT: bc 12, 1, .LBB7_12 +; NOVSX-NEXT: # %bb.11: # %entry +; NOVSX-NEXT: fmr 5, 4 +; NOVSX-NEXT: .LBB7_12: # %entry +; NOVSX-NEXT: ld 3, -8(1) +; NOVSX-NEXT: bc 12, 3, .LBB7_14 +; NOVSX-NEXT: # %bb.13: # %entry +; NOVSX-NEXT: fmr 0, 5 +; NOVSX-NEXT: .LBB7_14: # %entry +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: ld 3, -16(1) +; NOVSX-NEXT: bc 4, 2, .LBB7_19 +; NOVSX-NEXT: # %bb.15: # %entry +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: bc 4, 2, .LBB7_20 +; NOVSX-NEXT: .LBB7_16: # %entry +; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: bc 12, 2, .LBB7_18 +; NOVSX-NEXT: .LBB7_17: # %entry +; NOVSX-NEXT: fmr 4, 0 +; NOVSX-NEXT: .LBB7_18: # %entry +; NOVSX-NEXT: fmr 1, 3 +; NOVSX-NEXT: fmr 2, 4 +; NOVSX-NEXT: blr +; NOVSX-NEXT: .LBB7_19: # %entry +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: bc 12, 2, .LBB7_16 +; NOVSX-NEXT: .LBB7_20: # %entry +; NOVSX-NEXT: fmr 4, 2 +; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: bc 4, 2, .LBB7_17 +; NOVSX-NEXT: b .LBB7_18 +; +; VSX-LABEL: v2f64_maximum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: xvcmpeqdp 37, 35, 35 +; VSX-NEXT: addis 3, 2, .LCPI7_0@toc@ha +; VSX-NEXT: xvcmpeqdp 32, 34, 34 +; VSX-NEXT: addi 3, 3, .LCPI7_0@toc@l +; VSX-NEXT: xvmaxdp 0, 34, 35 +; VSX-NEXT: lxvd2x 1, 0, 3 +; VSX-NEXT: xxlxor 36, 36, 36 +; VSX-NEXT: vcmpequd 1, 2, 4 +; VSX-NEXT: xxlnor 37, 37, 37 +; VSX-NEXT: xxlnor 32, 32, 32 +; VSX-NEXT: xxlor 2, 32, 37 +; VSX-NEXT: vcmpequd 5, 3, 4 +; VSX-NEXT: xxsel 0, 0, 1, 2 +; VSX-NEXT: xxsel 1, 0, 34, 33 +; VSX-NEXT: xvcmpeqdp 34, 0, 36 +; VSX-NEXT: xxsel 1, 1, 35, 37 +; VSX-NEXT: xxsel 34, 0, 1, 34 +; VSX-NEXT: blr +; +; AIX-LABEL: v2f64_maximum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: xvcmpeqdp 36, 35, 35 +; AIX-NEXT: ld 3, L..C8(2) # %const.0 +; AIX-NEXT: xvcmpeqdp 37, 34, 34 +; AIX-NEXT: xvmaxdp 0, 34, 35 +; AIX-NEXT: xxlxor 32, 32, 32 +; AIX-NEXT: lxvd2x 1, 0, 3 +; AIX-NEXT: vcmpequd 1, 2, 0 +; AIX-NEXT: xxlnor 36, 36, 36 +; AIX-NEXT: xxlnor 37, 37, 37 +; AIX-NEXT: xxlor 2, 37, 36 +; AIX-NEXT: vcmpequd 4, 3, 0 +; AIX-NEXT: xxsel 0, 0, 1, 2 +; AIX-NEXT: xxsel 1, 0, 34, 33 +; AIX-NEXT: xvcmpeqdp 34, 0, 32 +; AIX-NEXT: xxsel 1, 1, 35, 36 +; AIX-NEXT: xxsel 34, 0, 1, 34 +; AIX-NEXT: blr +entry: + %m = call <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b) + ret <2 x double> %m +} + +declare float @llvm.maximum.f32(float, float) +declare double @llvm.maximum.f64(double, double) +declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>) +declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>) + +declare float @llvm.minimum.f32(float, float) +declare double @llvm.minimum.f64(double, double) +declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>) +declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)