Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8601,6 +8601,14 @@ if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16) return SDValue(); + if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) { + if (NumVecElts != 4) + return SDValue(); + } else { + if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16) + return SDValue(); + } + int NumExpectedSteps = APInt(8, NumVecElts).logBase2(); SDValue PreOp = OpV; // Iterate over each step of the across vector reduction. @@ -8654,25 +8662,30 @@ default: llvm_unreachable("Unexpected operator for across vector reduction"); case ISD::ADD: - Opcode = AArch64ISD::UADDV; + Opcode = Intrinsic::aarch64_neon_uaddv; break; case ISD::SMAX: - Opcode = AArch64ISD::SMAXV; + Opcode = Intrinsic::aarch64_neon_smaxv; break; case ISD::UMAX: - Opcode = AArch64ISD::UMAXV; + Opcode = Intrinsic::aarch64_neon_umaxv; break; case ISD::SMIN: - Opcode = AArch64ISD::SMINV; + Opcode = Intrinsic::aarch64_neon_sminv; break; case ISD::UMIN: - Opcode = AArch64ISD::UMINV; + Opcode = Intrinsic::aarch64_neon_uminv; + break; + case ISD::FMAXNUM: + Opcode = Intrinsic::aarch64_neon_fmaxnmv; + break; + case ISD::FMINNUM: + Opcode = Intrinsic::aarch64_neon_fminnmv; break; } SDLoc DL(N); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), - DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp), - DAG.getConstant(0, DL, MVT::i64)); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0), + DAG.getConstant(Opcode, DL, MVT::i32), PreOp); } /// Target-specific DAG combine for the across vector min/max reductions. @@ -8696,9 +8709,6 @@ /// becomes : /// %1 = smaxv %0 /// %result = extract_vector_elt %1, 0 -/// FIXME: Currently this function matches only SMAXV, UMAXV, SMINV, and UMINV. -/// We could also support other types of across lane reduction available -/// in AArch64, including FMAXNMV, FMAXV, FMINNMV, and FMINV. static SDValue performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget) { @@ -8726,7 +8736,8 @@ SDValue VectorOp = SetCC.getOperand(0); unsigned Op = VectorOp->getOpcode(); // Check if the input vector is fed by the operator we want to handle. - if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN && Op != ISD::UMIN) + if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN && + Op != ISD::UMIN && Op != ISD::FMAXNUM && Op != ISD::FMINNUM) return SDValue(); EVT VTy = VectorOp.getValueType(); @@ -8734,8 +8745,13 @@ return SDValue(); EVT EltTy = VTy.getVectorElementType(); - if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8) - return SDValue(); + if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) { + if (EltTy != MVT::f32) + return SDValue(); + } else { + if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8) + return SDValue(); + } // Check if extracting from the same vector. // For example, @@ -8752,7 +8768,13 @@ if ((Op == ISD::SMAX && CC != ISD::SETGT && CC != ISD::SETGE) || (Op == ISD::UMAX && CC != ISD::SETUGT && CC != ISD::SETUGE) || (Op == ISD::SMIN && CC != ISD::SETLT && CC != ISD::SETLE) || - (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE)) + (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE) || + (Op == ISD::FMAXNUM && CC != ISD::SETOGT && CC != ISD::SETOGE && + CC != ISD::SETUGT && CC != ISD::SETUGE && CC != ISD::SETGT && + CC != ISD::SETGE) || + (Op == ISD::FMINNUM && CC != ISD::SETOLT && CC != ISD::SETOLE && + CC != ISD::SETULT && CC != ISD::SETULE && CC != ISD::SETLT && + CC != ISD::SETLE)) return SDValue(); // Expect to check only lane 0 from the vector SETCC. Index: test/CodeGen/AArch64/aarch64-minmaxv.ll =================================================================== --- test/CodeGen/AArch64/aarch64-minmaxv.ll +++ test/CodeGen/AArch64/aarch64-minmaxv.ll @@ -285,3 +285,35 @@ %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt ret i64 %r } + +; CHECK-LABEL: f_fmaxnmv +; CHECK: fmaxnmv +define float @f_fmaxnmv(<4 x float>* nocapture readonly %arr) { + %rdx.minmax.select = load <4 x float>, <4 x float>* %arr + %rdx.shuf = shufflevector <4 x float> %rdx.minmax.select, <4 x float> undef, <4 x i32> + %rdx.minmax.cmp = fcmp fast oge <4 x float> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.select1 = select <4 x i1> %rdx.minmax.cmp, <4 x float> %rdx.minmax.select, <4 x float> %rdx.shuf + %rdx.shuf1 = shufflevector <4 x float> %rdx.minmax.select1, <4 x float> undef, <4 x i32> + %rdx.minmax.cmp1 = fcmp fast oge <4 x float> %rdx.minmax.select1, %rdx.shuf1 + %rdx.minmax.cmp1.elt = extractelement <4 x i1> %rdx.minmax.cmp1, i32 0 + %rdx.minmax.select1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 0 + %rdx.shuf1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 1 + %r = select i1 %rdx.minmax.cmp1.elt, float %rdx.minmax.select1.elt, float %rdx.shuf1.elt + ret float %r +} + +; CHECK-LABEL: f_fminnmv +; CHECK: fminnmv +define float @f_fminnmv(<4 x float>* nocapture readonly %arr) { + %rdx.minmax.select = load <4 x float>, <4 x float>* %arr + %rdx.shuf = shufflevector <4 x float> %rdx.minmax.select, <4 x float> undef, <4 x i32> + %rdx.minmax.cmp = fcmp fast ole <4 x float> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.select1 = select <4 x i1> %rdx.minmax.cmp, <4 x float> %rdx.minmax.select, <4 x float> %rdx.shuf + %rdx.shuf1 = shufflevector <4 x float> %rdx.minmax.select1, <4 x float> undef, <4 x i32> + %rdx.minmax.cmp1 = fcmp fast ole <4 x float> %rdx.minmax.select1, %rdx.shuf1 + %rdx.minmax.cmp1.elt = extractelement <4 x i1> %rdx.minmax.cmp1, i32 0 + %rdx.minmax.select1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 0 + %rdx.shuf1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 1 + %r = select i1 %rdx.minmax.cmp1.elt, float %rdx.minmax.select1.elt, float %rdx.shuf1.elt + ret float %r +}