Index: include/llvm/CodeGen/SelectionDAG.h =================================================================== --- include/llvm/CodeGen/SelectionDAG.h +++ include/llvm/CodeGen/SelectionDAG.h @@ -1579,6 +1579,9 @@ return SplitVector(N->getOperand(OpNo), SDLoc(N)); } + /// Widen the vector up to the next power of two using INSERT_SUBVECTOR. + SDValue WidenVector(const SDValue &N, const SDLoc &DL); + /// Append the extracted elements from Start to Count out of the vector Op /// in Args. If Count is 0, all of the elements will be extracted. void ExtractVectorElements(SDValue Op, SmallVectorImpl &Args, Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -827,6 +827,7 @@ SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_SETCC(SDNode* N); + SDValue WidenVecOp_VSELECT(SDNode *N); SDValue WidenVecOp_Convert(SDNode *N); SDValue WidenVecOp_FCOPYSIGN(SDNode *N); Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3653,8 +3653,15 @@ return Res; } - InOp1 = GetWidenedVector(InOp1); - SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + // If the inputs also widen, handle them directly. Otherwise widen by hand. + SDValue InOp2 = N->getOperand(1); + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { + InOp1 = GetWidenedVector(InOp1); + InOp2 = GetWidenedVector(InOp2); + } else { + InOp1 = DAG.WidenVector(InOp1, SDLoc(N)); + InOp2 = DAG.WidenVector(InOp2, SDLoc(N)); + } // Assume that the input and output will be widen appropriately. If not, // we will have to unroll it at some point. @@ -3697,6 +3704,7 @@ case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break; case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; + case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break; case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break; case ISD::ANY_EXTEND: @@ -4061,6 +4069,24 @@ return PromoteTargetBoolean(CC, VT); } +SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) { + // This only gets called in the case that the left and right inputs and + // result are of a legal odd vector type, and the condition is illegal i1 of + // the same odd width that needs widening. + EVT VT = N->getValueType(0); + assert(VT.isVector() && !VT.isPow2VectorType() && isTypeLegal(VT)); + + SDValue Cond = GetWidenedVector(N->getOperand(0)); + SDValue LeftIn = DAG.WidenVector(N->getOperand(1), SDLoc(N)); + SDValue RightIn = DAG.WidenVector(N->getOperand(2), SDLoc(N)); + SDLoc DL(N); + + SDValue Select = DAG.getNode(N->getOpcode(), DL, LeftIn.getValueType(), Cond, + LeftIn, RightIn); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, VT, Select, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); +} //===----------------------------------------------------------------------===// // Vector Widening Utilities Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -9066,6 +9066,15 @@ return std::make_pair(Lo, Hi); } +/// Widen the vector up to the next power of two using INSERT_SUBVECTOR. +SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) { + EVT VT = N.getValueType(); + EVT WideVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(), + NextPowerOf2(VT.getVectorNumElements())); + return getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, getUNDEF(WideVT), N, + getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout()))); +} + void SelectionDAG::ExtractVectorElements(SDValue Op, SmallVectorImpl &Args, unsigned Start, unsigned Count) { Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -740,8 +740,10 @@ TargetLoweringBase::LegalizeKind TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { - // If this is a simple type, use the ComputeRegisterProp mechanism. - if (VT.isSimple()) { + // If this is a simple type, excluding a non-legal non-power-of-two vector, + // use the ComputeRegisterProp mechanism. + if (VT.isSimple() && + (!VT.isVector() || VT.isPow2VectorType() || isTypeLegal(VT))) { MVT SVT = VT.getSimpleVT(); assert((unsigned)SVT.SimpleTy < array_lengthof(TransformToType)); MVT NVT = TransformToType[SVT.SimpleTy]; Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1028,11 +1028,12 @@ if (MemVT.isVector() && MemVT.getVectorNumElements() == 1) MemVT = MemVT.getScalarType(); - if (MemVT.isExtended()) { - // This should really only happen if we have vec3 arguments - assert(MemVT.isVector() && MemVT.getVectorNumElements() == 3); + // Round up vec3 argument. + if (MemVT.isVector() && MemVT.getVectorNumElements() == 3) MemVT = MemVT.getPow2VectorType(State.getContext()); - } + else + assert(!MemVT.isExtended() && + (!MemVT.isVector() || MemVT.isPow2VectorType())); unsigned PartOffset = 0; for (unsigned i = 0; i != NumRegs; ++i) { Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -340,7 +340,7 @@ TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args ) { EVT OrigTy = TLI->getValueType(DL, Ty); - if (!OrigTy.isSimple()) { + if (!OrigTy.isSimple() || (OrigTy.isVector() && !OrigTy.isPow2VectorType())) { return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); } Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -12150,7 +12150,7 @@ MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); uint32_t IntBits = IntTy.getSizeInBits(); unsigned NumLanes = Op.getValueType().getVectorNumElements(); - if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) { + if (FloatBits != 32 || IntBits > 32 || NumLanes > 4 || NumLanes == 3) { // These instructions only exist converting from f32 to i32. We can handle // smaller integers by generating an extra truncate, but larger ones would // be lossy. We also can't handle more then 4 lanes, since these intructions @@ -12208,7 +12208,7 @@ MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType(); uint32_t IntBits = IntTy.getSizeInBits(); unsigned NumLanes = Op.getValueType().getVectorNumElements(); - if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) { + if (FloatBits != 32 || IntBits > 32 || NumLanes > 4 || NumLanes == 3) { // These instructions only exist converting from i32 to f32. We can handle // smaller integers by generating an extra extend, but larger ones would // be lossy. We also can't handle more then 4 lanes, since these intructions