diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1404,6 +1404,7 @@ SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, SelectionDAG &DAG) const; SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -412,6 +412,11 @@ setOperationAction(ISD::FMA , MVT::f32, Legal); } + setOperationAction(ISD::FMAXIMUM, MVT::f32, Custom); + setOperationAction(ISD::FMINIMUM, MVT::f32, Custom); + setOperationAction(ISD::FMAXIMUM, MVT::f64, Custom); + setOperationAction(ISD::FMINIMUM, MVT::f64, Custom); + if (Subtarget.hasSPE()) setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); @@ -784,6 +789,8 @@ if (Subtarget.hasVSX()) { setOperationAction(ISD::FMAXNUM, VT, Legal); setOperationAction(ISD::FMINNUM, VT, Legal); + setOperationAction(ISD::FMAXIMUM, VT, Custom); + setOperationAction(ISD::FMINIMUM, VT, Custom); } // Vector instructions introduced in P8 @@ -1221,6 +1228,9 @@ setOperationAction(ISD::FP_ROUND, MVT::f32, Legal); setOperationAction(ISD::BITCAST, MVT::i128, Custom); + setOperationAction(ISD::FMAXIMUM, MVT::f128, Custom); + setOperationAction(ISD::FMINIMUM, MVT::f128, Custom); + // Handle constrained floating-point operations of fp128 setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal); @@ -11627,6 +11637,9 @@ return LowerATOMIC_LOAD_STORE(Op, DAG); case ISD::IS_FPCLASS: return LowerIS_FPCLASS(Op, DAG); + case ISD::FMINIMUM: + case ISD::FMAXIMUM: + return LowerFMINIMUM_FMAXIMUM(Op, DAG); } } @@ -18201,6 +18214,39 @@ return false; } +SDValue +PPCTargetLowering::LowerFMINIMUM_FMAXIMUM(llvm::SDValue Op, + llvm::SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + unsigned Opc = Op.getOpcode(); + EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + Op.getValueType()); + bool NoNaN = (Op->getFlags().hasNoNaNs() || + (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))); + + // Use two-way comparison to propagate NaN. + SDValue NaNCheck = DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO); + SDValue MinMax; + if (Subtarget.hasVSX() && Op.getValueType() != MVT::f128) { + MinMax = DAG.getNode(Opc == ISD::FMAXIMUM ? ISD::FMAXNUM : ISD::FMINNUM, DL, + Op.getValueType(), LHS, RHS); + } else { + SDValue FPCmp = DAG.getSetCC( + DL, CCVT, LHS, RHS, Opc == ISD::FMAXIMUM ? ISD::SETOGT : ISD::SETOLT); + MinMax = DAG.getSelect(DL, Op.getValueType(), FPCmp, LHS, RHS); + } + if (NoNaN) + return MinMax; + ConstantFP *FPNaN = ConstantFP::get( + *DAG.getContext(), + APFloat::getNaN(DAG.EVTToAPFloatSemantics(Op.getValueType()))); + SDValue NaN = DAG.getConstantFP(*FPNaN, DL, Op.getValueType()); + SDValue Result = DAG.getSelect(DL, Op.getValueType(), NaNCheck, NaN, MinMax); + return Result; +} + SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op, SelectionDAG &DAG) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s + +define fp128 @f128_minimum(fp128 %a, fp128 %b) { +; CHECK-LABEL: f128_minimum: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscmpuqp 0, 2, 3 +; CHECK-NEXT: blt 0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: vmr 2, 3 +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: bnulr 0 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK-NEXT: lxv 34, 0(3) +; CHECK-NEXT: blr +entry: + %m = call fp128 @llvm.minimum.f128(fp128 %a, fp128 %b) + ret fp128 %m +} + +define fp128 @f128_maximum(fp128 %a, fp128 %b) { +; CHECK-LABEL: f128_maximum: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscmpuqp 0, 2, 3 +; CHECK-NEXT: bgt 0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: vmr 2, 3 +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT: bnulr 0 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: addis 3, 2, .LCPI1_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI1_0@toc@l +; CHECK-NEXT: lxv 34, 0(3) +; CHECK-NEXT: blr +entry: + %m = call fp128 @llvm.maximum.f128(fp128 %a, fp128 %b) + ret fp128 %m +} + +declare fp128 @llvm.minimum.f128(fp128, fp128) +declare fp128 @llvm.maximum.f128(fp128, fp128) diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll @@ -0,0 +1,500 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s --check-prefix=NOVSX +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s --check-prefix=VSX +; RUN: llc -mtriple=powerpc64-ibm-aix -mcpu=pwr8 < %s | FileCheck %s --check-prefix=AIX + +define float @f32_minimum(float %a, float %b) { +; NOVSX-LABEL: f32_minimum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: bc 12, 0, .LBB0_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: .LBB0_2: # %entry +; NOVSX-NEXT: bclr 4, 3, 0 +; NOVSX-NEXT: # %bb.3: +; NOVSX-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; NOVSX-NEXT: lfs 1, .LCPI0_0@toc@l(3) +; NOVSX-NEXT: blr +; +; VSX-LABEL: f32_minimum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: fcmpu 0, 1, 2 +; VSX-NEXT: bc 12, 3, .LBB0_2 +; VSX-NEXT: # %bb.1: # %entry +; VSX-NEXT: xsmindp 1, 1, 2 +; VSX-NEXT: blr +; VSX-NEXT: .LBB0_2: +; VSX-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; VSX-NEXT: lfs 1, .LCPI0_0@toc@l(3) +; VSX-NEXT: blr +; +; AIX-LABEL: f32_minimum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: fcmpu 0, 1, 2 +; AIX-NEXT: bc 12, 3, L..BB0_2 +; AIX-NEXT: # %bb.1: # %entry +; AIX-NEXT: xsmindp 1, 1, 2 +; AIX-NEXT: blr +; AIX-NEXT: L..BB0_2: +; AIX-NEXT: ld 3, L..C0(2) # %const.0 +; AIX-NEXT: lfs 1, 0(3) +; AIX-NEXT: blr +entry: + %m = call float @llvm.minimum.f32(float %a, float %b) + ret float %m +} + +define float @f32_maximum(float %a, float %b) { +; NOVSX-LABEL: f32_maximum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: bc 12, 1, .LBB1_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: .LBB1_2: # %entry +; NOVSX-NEXT: bclr 4, 3, 0 +; NOVSX-NEXT: # %bb.3: +; NOVSX-NEXT: addis 3, 2, .LCPI1_0@toc@ha +; NOVSX-NEXT: lfs 1, .LCPI1_0@toc@l(3) +; NOVSX-NEXT: blr +; +; VSX-LABEL: f32_maximum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: fcmpu 0, 1, 2 +; VSX-NEXT: bc 12, 3, .LBB1_2 +; VSX-NEXT: # %bb.1: # %entry +; VSX-NEXT: xsmaxdp 1, 1, 2 +; VSX-NEXT: blr +; VSX-NEXT: .LBB1_2: +; VSX-NEXT: addis 3, 2, .LCPI1_0@toc@ha +; VSX-NEXT: lfs 1, .LCPI1_0@toc@l(3) +; VSX-NEXT: blr +; +; AIX-LABEL: f32_maximum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: fcmpu 0, 1, 2 +; AIX-NEXT: bc 12, 3, L..BB1_2 +; AIX-NEXT: # %bb.1: # %entry +; AIX-NEXT: xsmaxdp 1, 1, 2 +; AIX-NEXT: blr +; AIX-NEXT: L..BB1_2: +; AIX-NEXT: ld 3, L..C1(2) # %const.0 +; AIX-NEXT: lfs 1, 0(3) +; AIX-NEXT: blr +entry: + %m = call float @llvm.maximum.f32(float %a, float %b) + ret float %m +} + +define double @f64_minimum(double %a, double %b) { +; NOVSX-LABEL: f64_minimum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: bc 12, 0, .LBB2_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: .LBB2_2: # %entry +; NOVSX-NEXT: bclr 4, 3, 0 +; NOVSX-NEXT: # %bb.3: +; NOVSX-NEXT: addis 3, 2, .LCPI2_0@toc@ha +; NOVSX-NEXT: lfs 1, .LCPI2_0@toc@l(3) +; NOVSX-NEXT: blr +; +; VSX-LABEL: f64_minimum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: fcmpu 0, 1, 2 +; VSX-NEXT: bc 12, 3, .LBB2_2 +; VSX-NEXT: # %bb.1: # %entry +; VSX-NEXT: xsmindp 1, 1, 2 +; VSX-NEXT: blr +; VSX-NEXT: .LBB2_2: +; VSX-NEXT: addis 3, 2, .LCPI2_0@toc@ha +; VSX-NEXT: lfs 1, .LCPI2_0@toc@l(3) +; VSX-NEXT: blr +; +; AIX-LABEL: f64_minimum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: fcmpu 0, 1, 2 +; AIX-NEXT: bc 12, 3, L..BB2_2 +; AIX-NEXT: # %bb.1: # %entry +; AIX-NEXT: xsmindp 1, 1, 2 +; AIX-NEXT: blr +; AIX-NEXT: L..BB2_2: +; AIX-NEXT: ld 3, L..C2(2) # %const.0 +; AIX-NEXT: lfs 1, 0(3) +; AIX-NEXT: blr +entry: + %m = call double @llvm.minimum.f64(double %a, double %b) + ret double %m +} + +define double @f64_maximum(double %a, double %b) { +; NOVSX-LABEL: f64_maximum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: bc 12, 1, .LBB3_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: .LBB3_2: # %entry +; NOVSX-NEXT: bclr 4, 3, 0 +; NOVSX-NEXT: # %bb.3: +; NOVSX-NEXT: addis 3, 2, .LCPI3_0@toc@ha +; NOVSX-NEXT: lfs 1, .LCPI3_0@toc@l(3) +; NOVSX-NEXT: blr +; +; VSX-LABEL: f64_maximum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: fcmpu 0, 1, 2 +; VSX-NEXT: bc 12, 3, .LBB3_2 +; VSX-NEXT: # %bb.1: # %entry +; VSX-NEXT: xsmaxdp 1, 1, 2 +; VSX-NEXT: blr +; VSX-NEXT: .LBB3_2: +; VSX-NEXT: addis 3, 2, .LCPI3_0@toc@ha +; VSX-NEXT: lfs 1, .LCPI3_0@toc@l(3) +; VSX-NEXT: blr +; +; AIX-LABEL: f64_maximum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: fcmpu 0, 1, 2 +; AIX-NEXT: bc 12, 3, L..BB3_2 +; AIX-NEXT: # %bb.1: # %entry +; AIX-NEXT: xsmaxdp 1, 1, 2 +; AIX-NEXT: blr +; AIX-NEXT: L..BB3_2: +; AIX-NEXT: ld 3, L..C3(2) # %const.0 +; AIX-NEXT: lfs 1, 0(3) +; AIX-NEXT: blr +entry: + %m = call double @llvm.maximum.f64(double %a, double %b) + ret double %m +} + +define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) { +; NOVSX-LABEL: v4f32_minimum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: addi 3, 1, -32 +; NOVSX-NEXT: addi 4, 1, -48 +; NOVSX-NEXT: stvx 3, 0, 3 +; NOVSX-NEXT: stvx 2, 0, 4 +; NOVSX-NEXT: lfs 0, -20(1) +; NOVSX-NEXT: lfs 1, -36(1) +; NOVSX-NEXT: fcmpu 0, 1, 0 +; NOVSX-NEXT: bc 12, 0, .LBB4_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 1, 0 +; NOVSX-NEXT: .LBB4_2: # %entry +; NOVSX-NEXT: addis 3, 2, .LCPI4_0@toc@ha +; NOVSX-NEXT: lfs 0, .LCPI4_0@toc@l(3) +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: bc 12, 3, .LBB4_4 +; NOVSX-NEXT: # %bb.3: # %entry +; NOVSX-NEXT: fmr 2, 1 +; NOVSX-NEXT: .LBB4_4: # %entry +; NOVSX-NEXT: stfs 2, -4(1) +; NOVSX-NEXT: lfs 2, -24(1) +; NOVSX-NEXT: lfs 1, -40(1) +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: bc 12, 0, .LBB4_6 +; NOVSX-NEXT: # %bb.5: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: .LBB4_6: # %entry +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: bc 12, 3, .LBB4_8 +; NOVSX-NEXT: # %bb.7: # %entry +; NOVSX-NEXT: fmr 2, 1 +; NOVSX-NEXT: .LBB4_8: # %entry +; NOVSX-NEXT: stfs 2, -8(1) +; NOVSX-NEXT: lfs 2, -28(1) +; NOVSX-NEXT: lfs 1, -44(1) +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: bc 12, 0, .LBB4_10 +; NOVSX-NEXT: # %bb.9: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: .LBB4_10: # %entry +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: bc 12, 3, .LBB4_12 +; NOVSX-NEXT: # %bb.11: # %entry +; NOVSX-NEXT: fmr 2, 1 +; NOVSX-NEXT: .LBB4_12: # %entry +; NOVSX-NEXT: stfs 2, -12(1) +; NOVSX-NEXT: lfs 2, -32(1) +; NOVSX-NEXT: lfs 1, -48(1) +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: bc 12, 0, .LBB4_14 +; NOVSX-NEXT: # %bb.13: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: .LBB4_14: # %entry +; NOVSX-NEXT: bc 12, 3, .LBB4_16 +; NOVSX-NEXT: # %bb.15: # %entry +; NOVSX-NEXT: fmr 0, 1 +; NOVSX-NEXT: .LBB4_16: # %entry +; NOVSX-NEXT: stfs 0, -16(1) +; NOVSX-NEXT: addi 3, 1, -16 +; NOVSX-NEXT: lvx 2, 0, 3 +; NOVSX-NEXT: blr +; +; VSX-LABEL: v4f32_minimum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: xvcmpeqsp 0, 35, 35 +; VSX-NEXT: addis 3, 2, .LCPI4_0@toc@ha +; VSX-NEXT: xvcmpeqsp 1, 34, 34 +; VSX-NEXT: addi 3, 3, .LCPI4_0@toc@l +; VSX-NEXT: xvminsp 2, 34, 35 +; VSX-NEXT: lxvd2x 3, 0, 3 +; VSX-NEXT: xxlnor 0, 0, 0 +; VSX-NEXT: xxlnor 1, 1, 1 +; VSX-NEXT: xxlor 0, 1, 0 +; VSX-NEXT: xxsel 34, 2, 3, 0 +; VSX-NEXT: blr +; +; AIX-LABEL: v4f32_minimum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: xvcmpeqsp 0, 35, 35 +; AIX-NEXT: ld 3, L..C4(2) # %const.0 +; AIX-NEXT: xvcmpeqsp 1, 34, 34 +; AIX-NEXT: xvminsp 2, 34, 35 +; AIX-NEXT: lxvw4x 3, 0, 3 +; AIX-NEXT: xxlnor 0, 0, 0 +; AIX-NEXT: xxlnor 1, 1, 1 +; AIX-NEXT: xxlor 0, 1, 0 +; AIX-NEXT: xxsel 34, 2, 3, 0 +; AIX-NEXT: blr +entry: + %m = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %m +} + +define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) { +; NOVSX-LABEL: v4f32_maximum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: addi 3, 1, -32 +; NOVSX-NEXT: addi 4, 1, -48 +; NOVSX-NEXT: stvx 3, 0, 3 +; NOVSX-NEXT: stvx 2, 0, 4 +; NOVSX-NEXT: lfs 0, -20(1) +; NOVSX-NEXT: lfs 1, -36(1) +; NOVSX-NEXT: fcmpu 0, 1, 0 +; NOVSX-NEXT: bc 12, 1, .LBB5_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 1, 0 +; NOVSX-NEXT: .LBB5_2: # %entry +; NOVSX-NEXT: addis 3, 2, .LCPI5_0@toc@ha +; NOVSX-NEXT: lfs 0, .LCPI5_0@toc@l(3) +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: bc 12, 3, .LBB5_4 +; NOVSX-NEXT: # %bb.3: # %entry +; NOVSX-NEXT: fmr 2, 1 +; NOVSX-NEXT: .LBB5_4: # %entry +; NOVSX-NEXT: stfs 2, -4(1) +; NOVSX-NEXT: lfs 2, -24(1) +; NOVSX-NEXT: lfs 1, -40(1) +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: bc 12, 1, .LBB5_6 +; NOVSX-NEXT: # %bb.5: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: .LBB5_6: # %entry +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: bc 12, 3, .LBB5_8 +; NOVSX-NEXT: # %bb.7: # %entry +; NOVSX-NEXT: fmr 2, 1 +; NOVSX-NEXT: .LBB5_8: # %entry +; NOVSX-NEXT: stfs 2, -8(1) +; NOVSX-NEXT: lfs 2, -28(1) +; NOVSX-NEXT: lfs 1, -44(1) +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: bc 12, 1, .LBB5_10 +; NOVSX-NEXT: # %bb.9: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: .LBB5_10: # %entry +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: bc 12, 3, .LBB5_12 +; NOVSX-NEXT: # %bb.11: # %entry +; NOVSX-NEXT: fmr 2, 1 +; NOVSX-NEXT: .LBB5_12: # %entry +; NOVSX-NEXT: stfs 2, -12(1) +; NOVSX-NEXT: lfs 2, -32(1) +; NOVSX-NEXT: lfs 1, -48(1) +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: bc 12, 1, .LBB5_14 +; NOVSX-NEXT: # %bb.13: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: .LBB5_14: # %entry +; NOVSX-NEXT: bc 12, 3, .LBB5_16 +; NOVSX-NEXT: # %bb.15: # %entry +; NOVSX-NEXT: fmr 0, 1 +; NOVSX-NEXT: .LBB5_16: # %entry +; NOVSX-NEXT: stfs 0, -16(1) +; NOVSX-NEXT: addi 3, 1, -16 +; NOVSX-NEXT: lvx 2, 0, 3 +; NOVSX-NEXT: blr +; +; VSX-LABEL: v4f32_maximum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: xvcmpeqsp 0, 35, 35 +; VSX-NEXT: addis 3, 2, .LCPI5_0@toc@ha +; VSX-NEXT: xvcmpeqsp 1, 34, 34 +; VSX-NEXT: addi 3, 3, .LCPI5_0@toc@l +; VSX-NEXT: xvmaxsp 2, 34, 35 +; VSX-NEXT: lxvd2x 3, 0, 3 +; VSX-NEXT: xxlnor 0, 0, 0 +; VSX-NEXT: xxlnor 1, 1, 1 +; VSX-NEXT: xxlor 0, 1, 0 +; VSX-NEXT: xxsel 34, 2, 3, 0 +; VSX-NEXT: blr +; +; AIX-LABEL: v4f32_maximum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: xvcmpeqsp 0, 35, 35 +; AIX-NEXT: ld 3, L..C5(2) # %const.0 +; AIX-NEXT: xvcmpeqsp 1, 34, 34 +; AIX-NEXT: xvmaxsp 2, 34, 35 +; AIX-NEXT: lxvw4x 3, 0, 3 +; AIX-NEXT: xxlnor 0, 0, 0 +; AIX-NEXT: xxlnor 1, 1, 1 +; AIX-NEXT: xxlor 0, 1, 0 +; AIX-NEXT: xxsel 34, 2, 3, 0 +; AIX-NEXT: blr +entry: + %m = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %m +} + +define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) { +; NOVSX-LABEL: v2f64_minimum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fcmpu 0, 1, 3 +; NOVSX-NEXT: fmr 5, 1 +; NOVSX-NEXT: bc 12, 0, .LBB6_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 5, 3 +; NOVSX-NEXT: .LBB6_2: # %entry +; NOVSX-NEXT: addis 3, 2, .LCPI6_0@toc@ha +; NOVSX-NEXT: lfs 0, .LCPI6_0@toc@l(3) +; NOVSX-NEXT: fmr 1, 0 +; NOVSX-NEXT: bc 4, 3, .LBB6_6 +; NOVSX-NEXT: # %bb.3: # %entry +; NOVSX-NEXT: fcmpu 0, 2, 4 +; NOVSX-NEXT: bc 4, 0, .LBB6_7 +; NOVSX-NEXT: .LBB6_4: # %entry +; NOVSX-NEXT: bc 4, 3, .LBB6_8 +; NOVSX-NEXT: .LBB6_5: # %entry +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: blr +; NOVSX-NEXT: .LBB6_6: # %entry +; NOVSX-NEXT: fmr 1, 5 +; NOVSX-NEXT: fcmpu 0, 2, 4 +; NOVSX-NEXT: bc 12, 0, .LBB6_4 +; NOVSX-NEXT: .LBB6_7: # %entry +; NOVSX-NEXT: fmr 2, 4 +; NOVSX-NEXT: bc 12, 3, .LBB6_5 +; NOVSX-NEXT: .LBB6_8: # %entry +; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: blr +; +; VSX-LABEL: v2f64_minimum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: xvcmpeqdp 36, 35, 35 +; VSX-NEXT: addis 3, 2, .LCPI6_0@toc@ha +; VSX-NEXT: xvcmpeqdp 37, 34, 34 +; VSX-NEXT: addi 3, 3, .LCPI6_0@toc@l +; VSX-NEXT: xvmindp 0, 34, 35 +; VSX-NEXT: lxvd2x 1, 0, 3 +; VSX-NEXT: xxlnor 34, 36, 36 +; VSX-NEXT: xxlnor 35, 37, 37 +; VSX-NEXT: xxlor 2, 35, 34 +; VSX-NEXT: xxsel 34, 0, 1, 2 +; VSX-NEXT: blr +; +; AIX-LABEL: v2f64_minimum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: xvcmpeqdp 36, 35, 35 +; AIX-NEXT: ld 3, L..C6(2) # %const.0 +; AIX-NEXT: xvcmpeqdp 37, 34, 34 +; AIX-NEXT: xvmindp 0, 34, 35 +; AIX-NEXT: lxvd2x 1, 0, 3 +; AIX-NEXT: xxlnor 34, 36, 36 +; AIX-NEXT: xxlnor 35, 37, 37 +; AIX-NEXT: xxlor 2, 35, 34 +; AIX-NEXT: xxsel 34, 0, 1, 2 +; AIX-NEXT: blr +entry: + %m = call <2 x double> @llvm.minimum.v2f64(<2 x double> %a, <2 x double> %b) + ret <2 x double> %m +} + +define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) { +; NOVSX-LABEL: v2f64_maximum: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fcmpu 0, 1, 3 +; NOVSX-NEXT: fmr 5, 1 +; NOVSX-NEXT: bc 12, 1, .LBB7_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 5, 3 +; NOVSX-NEXT: .LBB7_2: # %entry +; NOVSX-NEXT: addis 3, 2, .LCPI7_0@toc@ha +; NOVSX-NEXT: lfs 0, .LCPI7_0@toc@l(3) +; NOVSX-NEXT: fmr 1, 0 +; NOVSX-NEXT: bc 4, 3, .LBB7_6 +; NOVSX-NEXT: # %bb.3: # %entry +; NOVSX-NEXT: fcmpu 0, 2, 4 +; NOVSX-NEXT: bc 4, 1, .LBB7_7 +; NOVSX-NEXT: .LBB7_4: # %entry +; NOVSX-NEXT: bc 4, 3, .LBB7_8 +; NOVSX-NEXT: .LBB7_5: # %entry +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: blr +; NOVSX-NEXT: .LBB7_6: # %entry +; NOVSX-NEXT: fmr 1, 5 +; NOVSX-NEXT: fcmpu 0, 2, 4 +; NOVSX-NEXT: bc 12, 1, .LBB7_4 +; NOVSX-NEXT: .LBB7_7: # %entry +; NOVSX-NEXT: fmr 2, 4 +; NOVSX-NEXT: bc 12, 3, .LBB7_5 +; NOVSX-NEXT: .LBB7_8: # %entry +; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: blr +; +; VSX-LABEL: v2f64_maximum: +; VSX: # %bb.0: # %entry +; VSX-NEXT: xvcmpeqdp 36, 35, 35 +; VSX-NEXT: addis 3, 2, .LCPI7_0@toc@ha +; VSX-NEXT: xvcmpeqdp 37, 34, 34 +; VSX-NEXT: addi 3, 3, .LCPI7_0@toc@l +; VSX-NEXT: xvmaxdp 0, 34, 35 +; VSX-NEXT: lxvd2x 1, 0, 3 +; VSX-NEXT: xxlnor 34, 36, 36 +; VSX-NEXT: xxlnor 35, 37, 37 +; VSX-NEXT: xxlor 2, 35, 34 +; VSX-NEXT: xxsel 34, 0, 1, 2 +; VSX-NEXT: blr +; +; AIX-LABEL: v2f64_maximum: +; AIX: # %bb.0: # %entry +; AIX-NEXT: xvcmpeqdp 36, 35, 35 +; AIX-NEXT: ld 3, L..C7(2) # %const.0 +; AIX-NEXT: xvcmpeqdp 37, 34, 34 +; AIX-NEXT: xvmaxdp 0, 34, 35 +; AIX-NEXT: lxvd2x 1, 0, 3 +; AIX-NEXT: xxlnor 34, 36, 36 +; AIX-NEXT: xxlnor 35, 37, 37 +; AIX-NEXT: xxlor 2, 35, 34 +; AIX-NEXT: xxsel 34, 0, 1, 2 +; AIX-NEXT: blr +entry: + %m = call <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b) + ret <2 x double> %m +} + +declare float @llvm.maximum.f32(float, float) +declare double @llvm.maximum.f64(double, double) +declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>) +declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>) + +declare float @llvm.minimum.f32(float, float) +declare double @llvm.minimum.f64(double, double) +declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>) +declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)