Index: include/llvm/Analysis/VectorUtils.h =================================================================== --- include/llvm/Analysis/VectorUtils.h +++ include/llvm/Analysis/VectorUtils.h @@ -16,6 +16,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/IRBuilder.h" namespace llvm { @@ -123,6 +124,58 @@ /// This function always sets a (possibly null) value for each K in Kinds. Instruction *propagateMetadata(Instruction *I, ArrayRef VL); +/// \brief Create an interleave shuffle mask. +/// +/// This function creates a shuffle mask for interleaving \p NumVecs vectors of +/// vectorization factor \p VF into a single wide vector. The mask is of the +/// form: +/// +/// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...> +/// +/// For example, the mask for VF = 4 and NumVecs = 2 is: +/// +/// <0, 4, 1, 5, 2, 6, 3, 7>. +Constant *createInterleaveMask(IRBuilder<> &Builder, unsigned VF, + unsigned NumVecs); + +/// \brief Create a stride shuffle mask. +/// +/// This function creates a shuffle mask whose elements begin at \p Start and +/// are incremented by \p Stride. The mask can be used to deinterleave an +/// interleaved vector into separate vectors of vectorization factor \p VF. The +/// mask is of the form: +/// +/// +/// +/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is: +/// +/// <0, 2, 4, 6> +Constant *createStrideMask(IRBuilder<> &Builder, unsigned Start, + unsigned Stride, unsigned VF); + +/// \brief Create a sequential shuffle mask. +/// +/// This function creates shuffle mask whose elements are sequential and begin +/// at \p Start. The mask contains \p NumInts integers and is padded with \p +/// NumUndefs undef values. The mask is of the form: +/// +/// +/// +/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is: +/// +/// <0, 1, 2, 3, undef, undef, undef, undef> +Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start, + unsigned NumInts, unsigned NumUndefs); + +/// \brief Concatenate a list of vectors. +/// +/// This function generates code that concatenate the vectors in \p Vecs into a +/// single large vector. The number of vectors should be greater than one, and +/// their element types should be the same. The number of elements in the +/// vectors should also be the same; however, if the last vector has fewer +/// elements, it will be padded with undefs. +Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef Vecs); + } // llvm namespace #endif Index: lib/Analysis/VectorUtils.cpp =================================================================== --- lib/Analysis/VectorUtils.cpp +++ lib/Analysis/VectorUtils.cpp @@ -488,3 +488,88 @@ return Inst; } + +Constant *llvm::createInterleaveMask(IRBuilder<> &Builder, unsigned VF, + unsigned NumVecs) { + SmallVector Mask; + for (unsigned i = 0; i < VF; i++) + for (unsigned j = 0; j < NumVecs; j++) + Mask.push_back(Builder.getInt32(j * VF + i)); + + return ConstantVector::get(Mask); +} + +Constant *llvm::createStrideMask(IRBuilder<> &Builder, unsigned Start, + unsigned Stride, unsigned VF) { + SmallVector Mask; + for (unsigned i = 0; i < VF; i++) + Mask.push_back(Builder.getInt32(Start + i * Stride)); + + return ConstantVector::get(Mask); +} + +Constant *llvm::createSequentialMask(IRBuilder<> &Builder, unsigned Start, + unsigned NumInts, unsigned NumUndefs) { + SmallVector Mask; + for (unsigned i = 0; i < NumInts; i++) + Mask.push_back(Builder.getInt32(Start + i)); + + Constant *Undef = UndefValue::get(Builder.getInt32Ty()); + for (unsigned i = 0; i < NumUndefs; i++) + Mask.push_back(Undef); + + return ConstantVector::get(Mask); +} + +/// A helper function for concatenating vectors. This function concatenates two +/// vectors having the same element type. If the second vector has fewer +/// elements than the first, it is padded with undefs. +static Value *concatenateTwoVectors(IRBuilder<> &Builder, Value *V1, + Value *V2) { + VectorType *VecTy1 = dyn_cast(V1->getType()); + VectorType *VecTy2 = dyn_cast(V2->getType()); + assert(VecTy1 && VecTy2 && + VecTy1->getScalarType() == VecTy2->getScalarType() && + "Expect two vectors with the same element type"); + + unsigned NumElts1 = VecTy1->getNumElements(); + unsigned NumElts2 = VecTy2->getNumElements(); + assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements"); + + if (NumElts1 > NumElts2) { + // Extend with UNDEFs. + Constant *ExtMask = + createSequentialMask(Builder, 0, NumElts2, NumElts1 - NumElts2); + V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask); + } + + Constant *Mask = createSequentialMask(Builder, 0, NumElts1 + NumElts2, 0); + return Builder.CreateShuffleVector(V1, V2, Mask); +} + +Value *llvm::concatenateVectors(IRBuilder<> &Builder, ArrayRef Vecs) { + unsigned NumVecs = Vecs.size(); + assert(NumVecs > 1 && "Should be at least two vectors"); + + SmallVector ResList; + ResList.append(Vecs.begin(), Vecs.end()); + do { + SmallVector TmpList; + for (unsigned i = 0; i < NumVecs - 1; i += 2) { + Value *V0 = ResList[i], *V1 = ResList[i + 1]; + assert((V0->getType() == V1->getType() || i == NumVecs - 2) && + "Only the last vector may have a different type"); + + TmpList.push_back(concatenateTwoVectors(Builder, V0, V1)); + } + + // Push the last vector if the total number of vectors is odd. + if (NumVecs % 2 != 0) + TmpList.push_back(ResList[NumVecs - 1]); + + ResList = TmpList; + NumVecs = ResList.size(); + } while (NumVecs > 1); + + return ResList[0]; +} Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -7315,18 +7316,6 @@ return true; } -/// \brief Get a mask consisting of sequential integers starting from \p Start. -/// -/// I.e. -static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start, - unsigned NumElts) { - SmallVector Mask; - for (unsigned i = 0; i < NumElts; i++) - Mask.push_back(Builder.getInt32(Start + i)); - - return ConstantVector::get(Mask); -} - /// \brief Lower an interleaved store into a stN intrinsic. /// /// E.g. Lower an interleaved store (Factor = 3): @@ -7408,7 +7397,7 @@ for (unsigned i = 0; i < Factor; i++) { if (Mask[i] >= 0) { Ops.push_back(Builder.CreateShuffleVector( - Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen))); + Op0, Op1, createSequentialMask(Builder, Mask[i], LaneLen, 0))); } else { unsigned StartMask = 0; for (unsigned j = 1; j < LaneLen; j++) { @@ -7423,7 +7412,7 @@ // In the case of all undefs we're defaulting to using elems from 0 // Note: StartMask cannot be negative, it's checked in isReInterleaveMask Ops.push_back(Builder.CreateShuffleVector( - Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen))); + Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0))); } } Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -38,6 +38,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/IntrinsicLowering.h" @@ -13342,18 +13343,6 @@ return true; } -/// \brief Get a mask consisting of sequential integers starting from \p Start. -/// -/// I.e. -static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start, - unsigned NumElts) { - SmallVector Mask; - for (unsigned i = 0; i < NumElts; i++) - Mask.push_back(Builder.getInt32(Start + i)); - - return ConstantVector::get(Mask); -} - /// \brief Lower an interleaved store into a vstN intrinsic. /// /// E.g. Lower an interleaved store (Factor = 3): @@ -13439,7 +13428,7 @@ for (unsigned i = 0; i < Factor; i++) { if (Mask[i] >= 0) { Ops.push_back(Builder.CreateShuffleVector( - Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen))); + Op0, Op1, createSequentialMask(Builder, Mask[i], LaneLen, 0))); } else { unsigned StartMask = 0; for (unsigned j = 1; j < LaneLen; j++) { @@ -13454,7 +13443,7 @@ // In the case of all undefs we're defaulting to using elems from 0 // Note: StartMask cannot be negative, it's checked in isReInterleaveMask Ops.push_back(Builder.CreateShuffleVector( - Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen))); + Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0))); } } Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2550,102 +2550,6 @@ "reverse"); } -// Get a mask to interleave \p NumVec vectors into a wide vector. -// I.e. <0, VF, VF*2, ..., VF*(NumVec-1), 1, VF+1, VF*2+1, ...> -// E.g. For 2 interleaved vectors, if VF is 4, the mask is: -// <0, 4, 1, 5, 2, 6, 3, 7> -static Constant *getInterleavedMask(IRBuilder<> &Builder, unsigned VF, - unsigned NumVec) { - SmallVector Mask; - for (unsigned i = 0; i < VF; i++) - for (unsigned j = 0; j < NumVec; j++) - Mask.push_back(Builder.getInt32(j * VF + i)); - - return ConstantVector::get(Mask); -} - -// Get the strided mask starting from index \p Start. -// I.e. -static Constant *getStridedMask(IRBuilder<> &Builder, unsigned Start, - unsigned Stride, unsigned VF) { - SmallVector Mask; - for (unsigned i = 0; i < VF; i++) - Mask.push_back(Builder.getInt32(Start + i * Stride)); - - return ConstantVector::get(Mask); -} - -// Get a mask of two parts: The first part consists of sequential integers -// starting from 0, The second part consists of UNDEFs. -// I.e. <0, 1, 2, ..., NumInt - 1, undef, ..., undef> -static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned NumInt, - unsigned NumUndef) { - SmallVector Mask; - for (unsigned i = 0; i < NumInt; i++) - Mask.push_back(Builder.getInt32(i)); - - Constant *Undef = UndefValue::get(Builder.getInt32Ty()); - for (unsigned i = 0; i < NumUndef; i++) - Mask.push_back(Undef); - - return ConstantVector::get(Mask); -} - -// Concatenate two vectors with the same element type. The 2nd vector should -// not have more elements than the 1st vector. If the 2nd vector has less -// elements, extend it with UNDEFs. -static Value *ConcatenateTwoVectors(IRBuilder<> &Builder, Value *V1, - Value *V2) { - VectorType *VecTy1 = dyn_cast(V1->getType()); - VectorType *VecTy2 = dyn_cast(V2->getType()); - assert(VecTy1 && VecTy2 && - VecTy1->getScalarType() == VecTy2->getScalarType() && - "Expect two vectors with the same element type"); - - unsigned NumElts1 = VecTy1->getNumElements(); - unsigned NumElts2 = VecTy2->getNumElements(); - assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements"); - - if (NumElts1 > NumElts2) { - // Extend with UNDEFs. - Constant *ExtMask = - getSequentialMask(Builder, NumElts2, NumElts1 - NumElts2); - V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask); - } - - Constant *Mask = getSequentialMask(Builder, NumElts1 + NumElts2, 0); - return Builder.CreateShuffleVector(V1, V2, Mask); -} - -// Concatenate vectors in the given list. All vectors have the same type. -static Value *ConcatenateVectors(IRBuilder<> &Builder, - ArrayRef InputList) { - unsigned NumVec = InputList.size(); - assert(NumVec > 1 && "Should be at least two vectors"); - - SmallVector ResList; - ResList.append(InputList.begin(), InputList.end()); - do { - SmallVector TmpList; - for (unsigned i = 0; i < NumVec - 1; i += 2) { - Value *V0 = ResList[i], *V1 = ResList[i + 1]; - assert((V0->getType() == V1->getType() || i == NumVec - 2) && - "Only the last vector may have a different type"); - - TmpList.push_back(ConcatenateTwoVectors(Builder, V0, V1)); - } - - // Push the last vector if the total number of vectors is odd. - if (NumVec % 2 != 0) - TmpList.push_back(ResList[NumVec - 1]); - - ResList = TmpList; - NumVec = ResList.size(); - } while (NumVec > 1); - - return ResList[0]; -} - // Try to vectorize the interleave group that \p Instr belongs to. // // E.g. Translate following interleaved load group (factor = 3): @@ -2751,7 +2655,7 @@ continue; VectorParts Entry(UF); - Constant *StrideMask = getStridedMask(Builder, I, InterleaveFactor, VF); + Constant *StrideMask = createStrideMask(Builder, I, InterleaveFactor, VF); for (unsigned Part = 0; Part < UF; Part++) { Value *StridedVec = Builder.CreateShuffleVector( NewLoads[Part], UndefVec, StrideMask, "strided.vec"); @@ -2795,10 +2699,10 @@ } // Concatenate all vectors into a wide vector. - Value *WideVec = ConcatenateVectors(Builder, StoredVecs); + Value *WideVec = concatenateVectors(Builder, StoredVecs); // Interleave the elements in the wide vector. - Constant *IMask = getInterleavedMask(Builder, VF, InterleaveFactor); + Constant *IMask = createInterleaveMask(Builder, VF, InterleaveFactor); Value *IVec = Builder.CreateShuffleVector(WideVec, UndefVec, IMask, "interleaved.vec");