Index: llvm/trunk/include/llvm/IR/DerivedTypes.h =================================================================== --- llvm/trunk/include/llvm/IR/DerivedTypes.h +++ llvm/trunk/include/llvm/IR/DerivedTypes.h @@ -475,16 +475,42 @@ return VectorType::get(EltTy, VTy->getElementCount()); } - /// This static method is like getInteger except that the element types are - /// half as wide as the elements in the input type. + // This static method gets a VectorType with the same number of elements as + // the input type, and the element type is an integer or float type which + // is half as wide as the elements in the input type. static VectorType *getTruncatedElementVectorType(VectorType *VTy) { - unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); - assert((EltBits & 1) == 0 && - "Cannot truncate vector element with odd bit-width"); - Type *EltTy = IntegerType::get(VTy->getContext(), EltBits / 2); + Type *EltTy; + if (VTy->getElementType()->isFloatingPointTy()) { + switch(VTy->getElementType()->getTypeID()) { + case DoubleTyID: + EltTy = Type::getFloatTy(VTy->getContext()); + break; + case FloatTyID: + EltTy = Type::getHalfTy(VTy->getContext()); + break; + default: + llvm_unreachable("Cannot create narrower fp vector element type"); + } + } else { + unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); + assert((EltBits & 1) == 0 && + "Cannot truncate vector element with odd bit-width"); + EltTy = IntegerType::get(VTy->getContext(), EltBits / 2); + } return VectorType::get(EltTy, VTy->getElementCount()); } + // This static method returns a VectorType with a smaller number of elements + // of a larger type than the input element type. For example, a <16 x i8> + // subdivided twice would return <4 x i32> + static VectorType *getSubdividedVectorType(VectorType *VTy, int NumSubdivs) { + for (int i = 0; i < NumSubdivs; ++i) { + VTy = VectorType::getDoubleElementsVectorType(VTy); + VTy = VectorType::getTruncatedElementVectorType(VTy); + } + return VTy; + } + /// This static method returns a VectorType with half as many elements as the /// input type and the same element type. static VectorType *getHalfElementsVectorType(VectorType *VTy) { Index: llvm/trunk/include/llvm/IR/Intrinsics.h =================================================================== --- llvm/trunk/include/llvm/IR/Intrinsics.h +++ llvm/trunk/include/llvm/IR/Intrinsics.h @@ -100,7 +100,8 @@ Integer, Vector, Pointer, Struct, Argument, ExtendArgument, TruncArgument, HalfVecArgument, SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfAnyPtrsToElt, - VecElementArgument, ScalableVecArgument + VecElementArgument, ScalableVecArgument, Subdivide2Argument, + Subdivide4Argument } Kind; union { @@ -125,14 +126,16 @@ assert(Kind == Argument || Kind == ExtendArgument || Kind == TruncArgument || Kind == HalfVecArgument || Kind == SameVecWidthArgument || Kind == PtrToArgument || - Kind == PtrToElt || Kind == VecElementArgument); + Kind == PtrToElt || Kind == VecElementArgument || + Kind == Subdivide2Argument || Kind == Subdivide4Argument); return Argument_Info >> 3; } ArgKind getArgumentKind() const { assert(Kind == Argument || Kind == ExtendArgument || Kind == TruncArgument || Kind == HalfVecArgument || Kind == SameVecWidthArgument || Kind == PtrToArgument || - Kind == VecElementArgument); + Kind == VecElementArgument || Kind == Subdivide2Argument || + Kind == Subdivide4Argument); return (ArgKind)(Argument_Info & 7); } Index: llvm/trunk/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/trunk/include/llvm/IR/Intrinsics.td +++ llvm/trunk/include/llvm/IR/Intrinsics.td @@ -187,6 +187,12 @@ // vector type, but change the element count to be half as many class LLVMHalfElementsVectorType : LLVMMatchType; +// Match the type of another intrinsic parameter that is expected to be a +// vector type (i.e. ) but with each element subdivided to +// form a vector with more elements that are smaller than the original. +class LLVMSubdivide2VectorType : LLVMMatchType; +class LLVMSubdivide4VectorType : LLVMMatchType; + def llvm_void_ty : LLVMType; let isAny = 1 in { def llvm_any_ty : LLVMType; Index: llvm/trunk/lib/IR/Function.cpp =================================================================== --- llvm/trunk/lib/IR/Function.cpp +++ llvm/trunk/lib/IR/Function.cpp @@ -703,7 +703,9 @@ IIT_STRUCT8 = 40, IIT_F128 = 41, IIT_VEC_ELEMENT = 42, - IIT_SCALABLE_VEC = 43 + IIT_SCALABLE_VEC = 43, + IIT_SUBDIVIDE2_ARG = 44, + IIT_SUBDIVIDE4_ARG = 45 }; static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, @@ -868,6 +870,18 @@ DecodeIITType(NextElt, Infos, OutputTable); return; } + case IIT_SUBDIVIDE2_ARG: { + unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Subdivide2Argument, + ArgInfo)); + return; + } + case IIT_SUBDIVIDE4_ARG: { + unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Subdivide4Argument, + ArgInfo)); + return; + } case IIT_VEC_ELEMENT: { unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecElementArgument, @@ -970,6 +984,14 @@ assert(ITy->getBitWidth() % 2 == 0); return IntegerType::get(Context, ITy->getBitWidth() / 2); } + case IITDescriptor::Subdivide2Argument: + case IITDescriptor::Subdivide4Argument: { + Type *Ty = Tys[D.getArgumentNumber()]; + VectorType *VTy = dyn_cast(Ty); + assert(VTy && "Expected an argument of Vector Type"); + int SubDivs = D.Kind == IITDescriptor::Subdivide2Argument ? 1 : 2; + return VectorType::getSubdividedVectorType(VTy, SubDivs); + } case IITDescriptor::HalfVecArgument: return VectorType::getHalfElementsVectorType(cast( Tys[D.getArgumentNumber()])); @@ -1269,6 +1291,20 @@ auto *ReferenceType = dyn_cast(ArgTys[D.getArgumentNumber()]); return !ReferenceType || Ty != ReferenceType->getElementType(); } + case IITDescriptor::Subdivide2Argument: + case IITDescriptor::Subdivide4Argument: { + // If this is a forward reference, defer the check for later. + if (D.getArgumentNumber() >= ArgTys.size()) + return IsDeferredCheck || DeferCheck(Ty); + + Type *NewTy = ArgTys[D.getArgumentNumber()]; + if (auto *VTy = dyn_cast(NewTy)) { + int SubDivs = D.Kind == IITDescriptor::Subdivide2Argument ? 1 : 2; + NewTy = VectorType::getSubdividedVectorType(VTy, SubDivs); + return Ty != NewTy; + } + return true; + } case IITDescriptor::ScalableVecArgument: { VectorType *VTy = dyn_cast(Ty); if (!VTy || !VTy->isScalable()) Index: llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp =================================================================== --- llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp +++ llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp @@ -221,7 +221,9 @@ IIT_STRUCT8 = 40, IIT_F128 = 41, IIT_VEC_ELEMENT = 42, - IIT_SCALABLE_VEC = 43 + IIT_SCALABLE_VEC = 43, + IIT_SUBDIVIDE2_ARG = 44, + IIT_SUBDIVIDE4_ARG = 45 }; static void EncodeFixedValueType(MVT::SimpleValueType VT, @@ -293,6 +295,10 @@ Sig.push_back(IIT_PTR_TO_ELT); else if (R->isSubClassOf("LLVMVectorElementType")) Sig.push_back(IIT_VEC_ELEMENT); + else if (R->isSubClassOf("LLVMSubdivide2VectorType")) + Sig.push_back(IIT_SUBDIVIDE2_ARG); + else if (R->isSubClassOf("LLVMSubdivide4VectorType")) + Sig.push_back(IIT_SUBDIVIDE4_ARG); else Sig.push_back(IIT_ARG); return Sig.push_back((Number << 3) | 7 /*IITDescriptor::AK_MatchType*/);