diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -339,22 +339,8 @@ /// /// This is the reverse process of "canWidenShuffleElements", but can always /// succeed. -template -void scaleShuffleMask(size_t Scale, ArrayRef Mask, - SmallVectorImpl &ScaledMask) { - assert(Scale > 0 && "Unexpected scaling factor"); - - // Fast-path: if no scaling, then it is just a copy. - if (Scale == 1) { - ScaledMask.assign(Mask.begin(), Mask.end()); - return; - } - - ScaledMask.clear(); - for (int MaskElt : Mask) - for (int ScaleElt = 0; ScaleElt != (int)Scale; ++ScaleElt) - ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + ScaleElt); -} +void scaleShuffleMask(size_t Scale, ArrayRef Mask, + SmallVectorImpl &ScaledMask); /// Compute a map of integer instructions to their minimum legal type /// size. diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -397,6 +397,22 @@ return false; } +void llvm::scaleShuffleMask(size_t Scale, ArrayRef Mask, + SmallVectorImpl &ScaledMask) { + assert(Scale > 0 && "Unexpected scaling factor"); + + // Fast-path: if no scaling, then it is just a copy. + if (Scale == 1) { + ScaledMask.assign(Mask.begin(), Mask.end()); + return; + } + + ScaledMask.clear(); + for (int MaskElt : Mask) + for (int ScaleElt = 0; ScaleElt != (int)Scale; ++ScaleElt) + ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + ScaleElt); +} + MapVector llvm::computeMinimumValueSizes(ArrayRef Blocks, DemandedBits &DB, const TargetTransformInfo *TTI) { diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19815,8 +19815,8 @@ ShuffleVectorSDNode *InnerSVN = cast(BC0); SmallVector InnerMask; SmallVector OuterMask; - scaleShuffleMask(InnerScale, InnerSVN->getMask(), InnerMask); - scaleShuffleMask(OuterScale, SVN->getMask(), OuterMask); + scaleShuffleMask(InnerScale, InnerSVN->getMask(), InnerMask); + scaleShuffleMask(OuterScale, SVN->getMask(), OuterMask); // Merge the shuffle masks. SmallVector NewMask; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1564,35 +1564,14 @@ }; /// Generate unpacklo/unpackhi shuffle mask. - template - void createUnpackShuffleMask(MVT VT, SmallVectorImpl &Mask, bool Lo, - bool Unary) { - assert(Mask.empty() && "Expected an empty shuffle mask vector"); - int NumElts = VT.getVectorNumElements(); - int NumEltsInLane = 128 / VT.getScalarSizeInBits(); - for (int i = 0; i < NumElts; ++i) { - unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane; - int Pos = (i % NumEltsInLane) / 2 + LaneStart; - Pos += (Unary ? 0 : NumElts * (i % 2)); - Pos += (Lo ? 0 : NumEltsInLane / 2); - Mask.push_back(Pos); - } - } + void createUnpackShuffleMask(MVT VT, SmallVectorImpl &Mask, bool Lo, + bool Unary); /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation /// imposed by AVX and specific to the unary pattern. Example: /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3> /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7> - template - void createSplat2ShuffleMask(MVT VT, SmallVectorImpl &Mask, bool Lo) { - assert(Mask.empty() && "Expected an empty shuffle mask vector"); - int NumElts = VT.getVectorNumElements(); - for (int i = 0; i < NumElts; ++i) { - int Pos = i / 2; - Pos += (Lo ? 0 : NumElts / 2); - Mask.push_back(Pos); - } - } + void createSplat2ShuffleMask(MVT VT, SmallVectorImpl &Mask, bool Lo); } // end namespace llvm diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6133,6 +6133,35 @@ return SDValue(); } +void llvm::createUnpackShuffleMask(MVT VT, SmallVectorImpl &Mask, + bool Lo, bool Unary) { + assert(Mask.empty() && "Expected an empty shuffle mask vector"); + int NumElts = VT.getVectorNumElements(); + int NumEltsInLane = 128 / VT.getScalarSizeInBits(); + for (int i = 0; i < NumElts; ++i) { + unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane; + int Pos = (i % NumEltsInLane) / 2 + LaneStart; + Pos += (Unary ? 0 : NumElts * (i % 2)); + Pos += (Lo ? 0 : NumEltsInLane / 2); + Mask.push_back(Pos); + } +} + +/// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation +/// imposed by AVX and specific to the unary pattern. Example: +/// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3> +/// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7> +void llvm::createSplat2ShuffleMask(MVT VT, SmallVectorImpl &Mask, + bool Lo) { + assert(Mask.empty() && "Expected an empty shuffle mask vector"); + int NumElts = VT.getVectorNumElements(); + for (int i = 0; i < NumElts; ++i) { + int Pos = i / 2; + Pos += (Lo ? 0 : NumElts / 2); + Mask.push_back(Pos); + } +} + /// Returns a vector_shuffle node for an unpackl operation. static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1, SDValue V2) { @@ -7320,8 +7349,8 @@ size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size()); SmallVector Mask0, Mask1; - scaleShuffleMask(MaskSize / SrcMask0.size(), SrcMask0, Mask0); - scaleShuffleMask(MaskSize / SrcMask1.size(), SrcMask1, Mask1); + scaleShuffleMask(MaskSize / SrcMask0.size(), SrcMask0, Mask0); + scaleShuffleMask(MaskSize / SrcMask1.size(), SrcMask1, Mask1); for (size_t i = 0; i != MaskSize; ++i) { if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef) Mask.push_back(SM_SentinelUndef); @@ -7379,7 +7408,7 @@ if ((NumSubElts % SubMask.size()) == 0) { int Scale = NumSubElts / SubMask.size(); SmallVector ScaledSubMask; - scaleShuffleMask(Scale, SubMask, ScaledSubMask); + scaleShuffleMask(Scale, SubMask, ScaledSubMask); SubMask = ScaledSubMask; } else { int Scale = SubMask.size() / NumSubElts; @@ -16279,7 +16308,7 @@ SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MVT::v4i64, Mask, RepeatedMask)) { SmallVector PSHUFDMask; - scaleShuffleMask(2, RepeatedMask, PSHUFDMask); + scaleShuffleMask(2, RepeatedMask, PSHUFDMask); return DAG.getBitcast( MVT::v4i64, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, @@ -16928,7 +16957,7 @@ SmallVector Widened256Mask; if (canWidenShuffleElements(Widened128Mask, Widened256Mask)) { Widened128Mask.clear(); - llvm::scaleShuffleMask(2, Widened256Mask, Widened128Mask); + llvm::scaleShuffleMask(2, Widened256Mask, Widened128Mask); } // Try to lower to vshuf64x2/vshuf32x4. @@ -17079,7 +17108,7 @@ SmallVector Repeated128Mask; if (is128BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated128Mask)) { SmallVector PSHUFDMask; - scaleShuffleMask(2, Repeated128Mask, PSHUFDMask); + scaleShuffleMask(2, Repeated128Mask, PSHUFDMask); return DAG.getBitcast( MVT::v8i64, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32, @@ -20166,7 +20195,7 @@ // Scale shuffle mask to avoid bitcasts and help ComputeNumSignBits. SmallVector Mask; int Scale = 64 / OutVT.getScalarSizeInBits(); - scaleShuffleMask(Scale, ArrayRef({ 0, 2, 1, 3 }), Mask); + scaleShuffleMask(Scale, { 0, 2, 1, 3 }, Mask); Res = DAG.getVectorShuffle(OutVT, DL, Res, Res, Mask); if (DstVT.is256BitVector()) @@ -33612,7 +33641,7 @@ // Narrow the repeated mask to create 32-bit element permutes. SmallVector WordMask = RepeatedMask; if (MaskScalarSizeInBits == 64) - scaleShuffleMask(2, RepeatedMask, WordMask); + scaleShuffleMask(2, RepeatedMask, WordMask); Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI); ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32); @@ -34065,7 +34094,7 @@ if (BaseMaskEltSizeInBits > 64) { assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size"); int MaskScale = BaseMaskEltSizeInBits / 64; - scaleShuffleMask(MaskScale, BaseMask, Mask); + scaleShuffleMask(MaskScale, BaseMask, Mask); } else { Mask = SmallVector(BaseMask.begin(), BaseMask.end()); } @@ -38189,7 +38218,7 @@ if ((NumSrcElts % Mask.size()) == 0) { SmallVector ScaledMask; int Scale = NumSrcElts / Mask.size(); - scaleShuffleMask(Scale, Mask, ScaledMask); + scaleShuffleMask(Scale, Mask, ScaledMask); Mask = std::move(ScaledMask); } else if ((Mask.size() % NumSrcElts) == 0) { // Simplify Mask based on demanded element. diff --git a/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/llvm/lib/Target/X86/X86InterleavedAccess.cpp --- a/llvm/lib/Target/X86/X86InterleavedAccess.cpp +++ b/llvm/lib/Target/X86/X86InterleavedAccess.cpp @@ -325,19 +325,19 @@ MVT VT = MVT::v8i16; TransposedMatrix.resize(2); - SmallVector MaskLow; - SmallVector MaskLowTemp1, MaskLowWord; - SmallVector MaskHighTemp1, MaskHighWord; + SmallVector MaskLow; + SmallVector MaskLowTemp1, MaskLowWord; + SmallVector MaskHighTemp1, MaskHighWord; for (unsigned i = 0; i < 8; ++i) { MaskLow.push_back(i); MaskLow.push_back(i + 8); } - createUnpackShuffleMask(VT, MaskLowTemp1, true, false); - createUnpackShuffleMask(VT, MaskHighTemp1, false, false); - scaleShuffleMask(2, MaskHighTemp1, MaskHighWord); - scaleShuffleMask(2, MaskLowTemp1, MaskLowWord); + createUnpackShuffleMask(VT, MaskLowTemp1, true, false); + createUnpackShuffleMask(VT, MaskHighTemp1, false, false); + scaleShuffleMask(2, MaskHighTemp1, MaskHighWord); + scaleShuffleMask(2, MaskLowTemp1, MaskLowWord); // IntrVec1Low = c0 m0 c1 m1 c2 m2 c3 m3 c4 m4 c5 m5 c6 m6 c7 m7 // IntrVec2Low = y0 k0 y1 k1 y2 k2 y3 k3 y4 k4 y5 k5 y6 k6 y7 k7 Value *IntrVec1Low = @@ -367,25 +367,25 @@ MVT HalfVT = scaleVectorType(VT); TransposedMatrix.resize(4); - SmallVector MaskHigh; - SmallVector MaskLow; - SmallVector LowHighMask[2]; - SmallVector MaskHighTemp; - SmallVector MaskLowTemp; + SmallVector MaskHigh; + SmallVector MaskLow; + SmallVector LowHighMask[2]; + SmallVector MaskHighTemp; + SmallVector MaskLowTemp; // MaskHighTemp and MaskLowTemp built in the vpunpckhbw and vpunpcklbw X86 // shuffle pattern. - createUnpackShuffleMask(VT, MaskLow, true, false); - createUnpackShuffleMask(VT, MaskHigh, false, false); + createUnpackShuffleMask(VT, MaskLow, true, false); + createUnpackShuffleMask(VT, MaskHigh, false, false); // MaskHighTemp1 and MaskLowTemp1 built in the vpunpckhdw and vpunpckldw X86 // shuffle pattern. - createUnpackShuffleMask(HalfVT, MaskLowTemp, true, false); - createUnpackShuffleMask(HalfVT, MaskHighTemp, false, false); - scaleShuffleMask(2, MaskLowTemp, LowHighMask[0]); - scaleShuffleMask(2, MaskHighTemp, LowHighMask[1]); + createUnpackShuffleMask(HalfVT, MaskLowTemp, true, false); + createUnpackShuffleMask(HalfVT, MaskHighTemp, false, false); + scaleShuffleMask(2, MaskLowTemp, LowHighMask[0]); + scaleShuffleMask(2, MaskHighTemp, LowHighMask[1]); // IntrVec1Low = c0 m0 c1 m1 ... c7 m7 | c16 m16 c17 m17 ... c23 m23 // IntrVec1High = c8 m8 c9 m9 ... c15 m15 | c24 m24 c25 m25 ... c31 m31 diff --git a/llvm/unittests/Analysis/VectorUtilsTest.cpp b/llvm/unittests/Analysis/VectorUtilsTest.cpp --- a/llvm/unittests/Analysis/VectorUtilsTest.cpp +++ b/llvm/unittests/Analysis/VectorUtilsTest.cpp @@ -100,10 +100,10 @@ TEST_F(BasicTest, scaleShuffleMask) { SmallVector ScaledMask; - scaleShuffleMask(1, {3,2,0,-2}, ScaledMask); - EXPECT_EQ(makeArrayRef(ScaledMask), makeArrayRef({3,2,0,-2})); - scaleShuffleMask(4, {3,2,0,-1}, ScaledMask); - EXPECT_EQ(makeArrayRef(ScaledMask), makeArrayRef({12,13,14,15,8,9,10,11,0,1,2,3,-1,-1,-1,-1})); + scaleShuffleMask(1, {3,2,0,-2}, ScaledMask); + EXPECT_EQ(makeArrayRef(ScaledMask), makeArrayRef({3,2,0,-2})); + scaleShuffleMask(4, {3,2,0,-1}, ScaledMask); + EXPECT_EQ(makeArrayRef(ScaledMask), makeArrayRef({12,13,14,15,8,9,10,11,0,1,2,3,-1,-1,-1,-1})); } TEST_F(BasicTest, getSplatIndex) {