Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -976,6 +976,10 @@ unsigned combineRepeatedFPDivisors() const override; CCAssignFn *useFastISelCCs(unsigned Flag) const; + + SDValue + combineElementTruncationToVectorTruncation(SDNode *N, + DAGCombinerInfo &DCI) const; }; namespace PPC { Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -10518,6 +10518,86 @@ ShiftCst); } +/// \brief Reduces the number of fp-to-int conversion when building a vector. +/// +/// If this vector is built out of floating to integer conversions, +/// transform it to a vector built out of floating point values followed by a +/// single floating to integer conversion of the vector. +/// Namely (build_vector (fptosi $A), (fptosi $B), ...) +/// becomes (fptosi (build_vector ($A, $B, ...))) +SDValue PPCTargetLowering:: +combineElementTruncationToVectorTruncation(SDNode *N, + DAGCombinerInfo &DCI) const { + assert(N->getOpcode() == ISD::BUILD_VECTOR && + "Should be called with a BUILD_VECTOR node"); + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + + SDValue FirstInput = N->getOperand(0); + assert(FirstInput.getOpcode() == PPCISD::MFVSR && + "The input operand must be an fp-to-int conversion."); + + // This combine happens after legalization so the fp_to_[su]i nodes are + // already converted to PPCSISD nodes. + unsigned FirstConversion = FirstInput.getOperand(0).getOpcode(); + if (FirstConversion == PPCISD::FCTIDZ || + FirstConversion == PPCISD::FCTIDUZ || + FirstConversion == PPCISD::FCTIWZ || + FirstConversion == PPCISD::FCTIWUZ) { + bool IsSplat = true; + bool Is32Bit = FirstConversion == PPCISD::FCTIWZ || + FirstConversion == PPCISD::FCTIWUZ; + EVT SrcVT = FirstInput.getOperand(0).getValueType(); + SmallVector Ops; + EVT TargetVT = N->getValueType(0); + for (int i = 0, e = N->getNumOperands(); i < e; ++i) { + if (N->getOperand(i).getOpcode() != PPCISD::MFVSR) + return SDValue(); + unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode(); + if (NextConversion != FirstConversion) + return SDValue(); + if (N->getOperand(i) != FirstInput) + IsSplat = false; + } + + // If this is a splat, we leave it as-is since there will be only a single + // fp-to-int conversion followed by a splat of the integer. This is better + // for 32-bit and smaller ints and neutral for 64-bit ints. + if (IsSplat) + return SDValue(); + + // Now that we know we have the right type of node, get its operands + for (int i = 0, e = N->getNumOperands(); i < e; ++i) { + SDValue In = N->getOperand(i).getOperand(0); + // For 32-bit values, we need to add an FP_ROUND node. + if (Is32Bit) { + if (In.isUndef()) + Ops.push_back(DAG.getUNDEF(SrcVT)); + else { + SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl, + MVT::f32, In.getOperand(0), + DAG.getIntPtrConstant(1, dl)); + Ops.push_back(Trunc); + } + } else + Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0)); + } + + unsigned Opcode; + if (FirstConversion == PPCISD::FCTIDZ || + FirstConversion == PPCISD::FCTIWZ) + Opcode = ISD::FP_TO_SINT; + else + Opcode = ISD::FP_TO_UINT; + + EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32; + SDValue BV = DAG.getBuildVector(NewVT, dl, Ops); + return DAG.getNode(Opcode, dl, TargetVT, BV); + } + return SDValue(); +} + SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::BUILD_VECTOR && @@ -10525,7 +10605,20 @@ SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); - if (N->getValueType(0) != MVT::v2f64 || !Subtarget.hasVSX()) + + if (!Subtarget.hasVSX()) + return SDValue(); + + // The target independent DAG combiner will leave a build_vector of + // float-to-int conversions intact. We can generate MUCH better code for + // a float-to-int conversion of a vector of floats. + SDValue FirstInput = N->getOperand(0); + if (FirstInput.getOpcode() == PPCISD::MFVSR) { + SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI); + if (Reduced) + return Reduced; + } + if (N->getValueType(0) != MVT::v2f64) return SDValue(); // Looking for: