diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -935,31 +935,28 @@ ArrayRef Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const { - int Limit = Mask.size() * 2; - if (Mask.empty() || - // Extra check required by isSingleSourceMaskImpl function (called by - // ShuffleVectorInst::isSingleSourceMask). - any_of(Mask, [Limit](int I) { return I >= Limit; })) + if (Mask.empty()) return Kind; + int NumSrcElts = Ty->getElementCount().getKnownMinValue(); switch (Kind) { case TTI::SK_PermuteSingleSrc: - if (ShuffleVectorInst::isReverseMask(Mask)) + if (ShuffleVectorInst::isReverseMask(Mask, NumSrcElts)) return TTI::SK_Reverse; - if (ShuffleVectorInst::isZeroEltSplatMask(Mask)) + if (ShuffleVectorInst::isZeroEltSplatMask(Mask, NumSrcElts)) return TTI::SK_Broadcast; break; case TTI::SK_PermuteTwoSrc: { int NumSubElts; if (Mask.size() > 2 && ShuffleVectorInst::isInsertSubvectorMask( - Mask, Mask.size(), NumSubElts, Index)) { + Mask, NumSrcElts, NumSubElts, Index)) { SubTy = FixedVectorType::get(Ty->getElementType(), NumSubElts); return TTI::SK_InsertSubvector; } - if (ShuffleVectorInst::isSelectMask(Mask)) + if (ShuffleVectorInst::isSelectMask(Mask, NumSrcElts)) return TTI::SK_Select; - if (ShuffleVectorInst::isTransposeMask(Mask)) + if (ShuffleVectorInst::isTransposeMask(Mask, NumSrcElts)) return TTI::SK_Transpose; - if (ShuffleVectorInst::isSpliceMask(Mask, Index)) + if (ShuffleVectorInst::isSpliceMask(Mask, NumSrcElts, Index)) return TTI::SK_Splice; break; } diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -2105,12 +2105,12 @@ /// vector. /// Example: <7,5,undef,7> /// This assumes that vector operands are the same length as the mask. - static bool isSingleSourceMask(ArrayRef Mask); - static bool isSingleSourceMask(const Constant *Mask) { + static bool isSingleSourceMask(ArrayRef Mask, int NumSrcElts); + static bool isSingleSourceMask(const Constant *Mask, int NumSrcElts) { assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant."); SmallVector MaskAsInts; getShuffleMask(Mask, MaskAsInts); - return isSingleSourceMask(MaskAsInts); + return isSingleSourceMask(MaskAsInts, NumSrcElts); } /// Return true if this shuffle chooses elements from exactly one source @@ -2118,7 +2118,8 @@ /// Example: shufflevector <4 x n> A, <4 x n> B, <3,0,undef,3> /// TODO: Optionally allow length-changing shuffles. bool isSingleSource() const { - return !changesLength() && isSingleSourceMask(ShuffleMask); + return !changesLength() && + isSingleSourceMask(ShuffleMask, ShuffleMask.size()); } /// Return true if this shuffle mask chooses elements from exactly one source @@ -2126,8 +2127,8 @@ /// necessarily a no-op because it may change the number of elements from its /// input vectors or it may provide demanded bits knowledge via undef lanes. /// Example: - static bool isIdentityMask(ArrayRef Mask); - static bool isIdentityMask(const Constant *Mask) { + static bool isIdentityMask(ArrayRef Mask, int NumSrcElts); + static bool isIdentityMask(const Constant *Mask, int NumSrcElts) { assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant."); // Not possible to express a shuffle mask for a scalable vector for this @@ -2137,7 +2138,7 @@ SmallVector MaskAsInts; getShuffleMask(Mask, MaskAsInts); - return isIdentityMask(MaskAsInts); + return isIdentityMask(MaskAsInts, NumSrcElts); } /// Return true if this shuffle chooses elements from exactly one source @@ -2150,7 +2151,7 @@ if (isa(getType())) return false; - return !changesLength() && isIdentityMask(ShuffleMask); + return !changesLength() && isIdentityMask(ShuffleMask, ShuffleMask.size()); } /// Return true if this shuffle lengthens exactly one source vector with @@ -2174,12 +2175,12 @@ /// In that case, the shuffle is better classified as an identity shuffle. /// This assumes that vector operands are the same length as the mask /// (a length-changing shuffle can never be equivalent to a vector select). - static bool isSelectMask(ArrayRef Mask); - static bool isSelectMask(const Constant *Mask) { + static bool isSelectMask(ArrayRef Mask, int NumSrcElts); + static bool isSelectMask(const Constant *Mask, int NumSrcElts) { assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant."); SmallVector MaskAsInts; getShuffleMask(Mask, MaskAsInts); - return isSelectMask(MaskAsInts); + return isSelectMask(MaskAsInts, NumSrcElts); } /// Return true if this shuffle chooses elements from its source vectors @@ -2191,19 +2192,19 @@ /// In that case, the shuffle is better classified as an identity shuffle. /// TODO: Optionally allow length-changing shuffles. bool isSelect() const { - return !changesLength() && isSelectMask(ShuffleMask); + return !changesLength() && isSelectMask(ShuffleMask, ShuffleMask.size()); } /// Return true if this shuffle mask swaps the order of elements from exactly /// one source vector. /// Example: <7,6,undef,4> /// This assumes that vector operands are the same length as the mask. - static bool isReverseMask(ArrayRef Mask); - static bool isReverseMask(const Constant *Mask) { + static bool isReverseMask(ArrayRef Mask, int NumSrcElts); + static bool isReverseMask(const Constant *Mask, int NumSrcElts) { assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant."); SmallVector MaskAsInts; getShuffleMask(Mask, MaskAsInts); - return isReverseMask(MaskAsInts); + return isReverseMask(MaskAsInts, NumSrcElts); } /// Return true if this shuffle swaps the order of elements from exactly @@ -2211,19 +2212,19 @@ /// Example: shufflevector <4 x n> A, <4 x n> B, <3,undef,1,undef> /// TODO: Optionally allow length-changing shuffles. bool isReverse() const { - return !changesLength() && isReverseMask(ShuffleMask); + return !changesLength() && isReverseMask(ShuffleMask, ShuffleMask.size()); } /// Return true if this shuffle mask chooses all elements with the same value /// as the first element of exactly one source vector. /// Example: <4,undef,undef,4> /// This assumes that vector operands are the same length as the mask. - static bool isZeroEltSplatMask(ArrayRef Mask); - static bool isZeroEltSplatMask(const Constant *Mask) { + static bool isZeroEltSplatMask(ArrayRef Mask, int NumSrcElts); + static bool isZeroEltSplatMask(const Constant *Mask, int NumSrcElts) { assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant."); SmallVector MaskAsInts; getShuffleMask(Mask, MaskAsInts); - return isZeroEltSplatMask(MaskAsInts); + return isZeroEltSplatMask(MaskAsInts, NumSrcElts); } /// Return true if all elements of this shuffle are the same value as the @@ -2233,7 +2234,8 @@ /// TODO: Optionally allow length-changing shuffles. /// TODO: Optionally allow splats from other elements. bool isZeroEltSplat() const { - return !changesLength() && isZeroEltSplatMask(ShuffleMask); + return !changesLength() && + isZeroEltSplatMask(ShuffleMask, ShuffleMask.size()); } /// Return true if this shuffle mask is a transpose mask. @@ -2268,12 +2270,12 @@ /// ; Transposed matrix /// t0 = < a, e, c, g > = shufflevector m0, m1 < 0, 4, 2, 6 > /// t1 = < b, f, d, h > = shufflevector m0, m1 < 1, 5, 3, 7 > - static bool isTransposeMask(ArrayRef Mask); - static bool isTransposeMask(const Constant *Mask) { + static bool isTransposeMask(ArrayRef Mask, int NumSrcElts); + static bool isTransposeMask(const Constant *Mask, int NumSrcElts) { assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant."); SmallVector MaskAsInts; getShuffleMask(Mask, MaskAsInts); - return isTransposeMask(MaskAsInts); + return isTransposeMask(MaskAsInts, NumSrcElts); } /// Return true if this shuffle transposes the elements of its inputs without @@ -2282,19 +2284,19 @@ /// exact specification. /// Example: shufflevector <4 x n> A, <4 x n> B, <0,4,2,6> bool isTranspose() const { - return !changesLength() && isTransposeMask(ShuffleMask); + return !changesLength() && isTransposeMask(ShuffleMask, ShuffleMask.size()); } /// Return true if this shuffle mask is a splice mask, concatenating the two /// inputs together and then extracts an original width vector starting from /// the splice index. /// Example: shufflevector <4 x n> A, <4 x n> B, <1,2,3,4> - static bool isSpliceMask(ArrayRef Mask, int &Index); - static bool isSpliceMask(const Constant *Mask, int &Index) { + static bool isSpliceMask(ArrayRef Mask, int NumSrcElts, int &Index); + static bool isSpliceMask(const Constant *Mask, int NumSrcElts, int &Index) { assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant."); SmallVector MaskAsInts; getShuffleMask(Mask, MaskAsInts); - return isSpliceMask(MaskAsInts, Index); + return isSpliceMask(MaskAsInts, NumSrcElts, Index); } /// Return true if this shuffle splices two inputs without changing the length @@ -2302,7 +2304,8 @@ /// then extracts an original width vector starting from the splice index. /// Example: shufflevector <4 x n> A, <4 x n> B, <1,2,3,4> bool isSplice(int &Index) const { - return !changesLength() && isSpliceMask(ShuffleMask, Index); + return !changesLength() && + isSpliceMask(ShuffleMask, ShuffleMask.size(), Index); } /// Return true if this shuffle mask is an extract subvector mask. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -23843,7 +23843,7 @@ // Profitability check: only deal with extractions from the first subvector // unless the mask becomes an identity mask. - if (!ShuffleVectorInst::isIdentityMask(NewMask) || + if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) || any_of(NewMask, [](int M) { return M < 0; })) for (auto &DemandedSubvector : DemandedSubvectors) if (DemandedSubvector.second != 0) diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -2328,10 +2328,10 @@ return UsesLHS || UsesRHS; } -bool ShuffleVectorInst::isSingleSourceMask(ArrayRef Mask) { +bool ShuffleVectorInst::isSingleSourceMask(ArrayRef Mask, int NumSrcElts) { // We don't have vector operand size information, so assume operands are the // same size as the mask. - return isSingleSourceMaskImpl(Mask, Mask.size()); + return isSingleSourceMaskImpl(Mask, NumSrcElts); } static bool isIdentityMaskImpl(ArrayRef Mask, int NumOpElts) { @@ -2346,65 +2346,71 @@ return true; } -bool ShuffleVectorInst::isIdentityMask(ArrayRef Mask) { +bool ShuffleVectorInst::isIdentityMask(ArrayRef Mask, int NumSrcElts) { // We don't have vector operand size information, so assume operands are the // same size as the mask. - return isIdentityMaskImpl(Mask, Mask.size()); + return isIdentityMaskImpl(Mask, NumSrcElts); } -bool ShuffleVectorInst::isReverseMask(ArrayRef Mask) { - if (!isSingleSourceMask(Mask)) +bool ShuffleVectorInst::isReverseMask(ArrayRef Mask, int NumSrcElts) { + if (!isSingleSourceMask(Mask, NumSrcElts)) return false; + if (Mask.size() != static_cast(NumSrcElts)) + return false; // The number of elements in the mask must be at least 2. - int NumElts = Mask.size(); - if (NumElts < 2) + if (NumSrcElts < 2) return false; - for (int i = 0; i < NumElts; ++i) { - if (Mask[i] == -1) + for (int I = 0, E = Mask.size(); I < E; ++I) { + if (Mask[I] == -1) continue; - if (Mask[i] != (NumElts - 1 - i) && Mask[i] != (NumElts + NumElts - 1 - i)) + if (Mask[I] != (NumSrcElts - 1 - I) && + Mask[I] != (NumSrcElts + NumSrcElts - 1 - I)) return false; } return true; } -bool ShuffleVectorInst::isZeroEltSplatMask(ArrayRef Mask) { - if (!isSingleSourceMask(Mask)) +bool ShuffleVectorInst::isZeroEltSplatMask(ArrayRef Mask, int NumSrcElts) { + if (!isSingleSourceMask(Mask, NumSrcElts)) return false; - for (int i = 0, NumElts = Mask.size(); i < NumElts; ++i) { - if (Mask[i] == -1) + for (int I = 0, E = Mask.size(); I < E; ++I) { + if (Mask[I] == -1) continue; - if (Mask[i] != 0 && Mask[i] != NumElts) + if (Mask[I] != 0 && Mask[I] != NumSrcElts) return false; } return true; } -bool ShuffleVectorInst::isSelectMask(ArrayRef Mask) { +bool ShuffleVectorInst::isSelectMask(ArrayRef Mask, int NumSrcElts) { // Select is differentiated from identity. It requires using both sources. - if (isSingleSourceMask(Mask)) + if (isSingleSourceMask(Mask, NumSrcElts)) return false; - for (int i = 0, NumElts = Mask.size(); i < NumElts; ++i) { - if (Mask[i] == -1) + if (Mask.size() != static_cast(NumSrcElts)) + return false; + for (int I = 0, E = Mask.size(); I < E; ++I) { + if (Mask[I] == -1) continue; - if (Mask[i] != i && Mask[i] != (NumElts + i)) + if (Mask[I] != I && Mask[I] != (NumSrcElts + I)) return false; } return true; } -bool ShuffleVectorInst::isTransposeMask(ArrayRef Mask) { +bool ShuffleVectorInst::isTransposeMask(ArrayRef Mask, int NumSrcElts) { // Example masks that will return true: // v1 = // v2 = // trn1 = shufflevector v1, v2 <0, 4, 2, 6> = // trn2 = shufflevector v1, v2 <1, 5, 3, 7> = + if (Mask.size() != static_cast(NumSrcElts)) + return false; // 1. The number of elements in the mask must be a power-of-2 and at least 2. - int NumElts = Mask.size(); - if (NumElts < 2 || !isPowerOf2_32(NumElts)) + int Sz = Mask.size(); + if (Sz < 2 || !isPowerOf2_32(Sz)) return false; // 2. The first element of the mask must be either a 0 or a 1. @@ -2413,23 +2419,26 @@ // 3. The difference between the first 2 elements must be equal to the // number of elements in the mask. - if ((Mask[1] - Mask[0]) != NumElts) + if ((Mask[1] - Mask[0]) != NumSrcElts) return false; // 4. The difference between consecutive even-numbered and odd-numbered // elements must be equal to 2. - for (int i = 2; i < NumElts; ++i) { - int MaskEltVal = Mask[i]; + for (int I = 2; I < Sz; ++I) { + int MaskEltVal = Mask[I]; if (MaskEltVal == -1) return false; - int MaskEltPrevVal = Mask[i - 2]; + int MaskEltPrevVal = Mask[I - 2]; if (MaskEltVal - MaskEltPrevVal != 2) return false; } return true; } -bool ShuffleVectorInst::isSpliceMask(ArrayRef Mask, int &Index) { +bool ShuffleVectorInst::isSpliceMask(ArrayRef Mask, int NumSrcElts, + int &Index) { + if (Mask.size() != static_cast(NumSrcElts)) + return false; // Example: shufflevector <4 x n> A, <4 x n> B, <1,2,3,4> int StartIndex = -1; for (int I = 0, E = Mask.size(); I != E; ++I) { @@ -2440,7 +2449,7 @@ if (StartIndex == -1) { // Don't support a StartIndex that begins in the second input, or if the // first non-undef index would access below the StartIndex. - if (MaskEltVal < I || E <= (MaskEltVal - I)) + if (MaskEltVal < I || NumSrcElts <= (MaskEltVal - I)) return false; StartIndex = MaskEltVal - I; @@ -2735,7 +2744,7 @@ // case. if (isa(getType())) return false; - if (!isSingleSourceMask(ShuffleMask)) + if (!isSingleSourceMask(ShuffleMask, VF)) return false; return isOneUseSingleSourceMask(ShuffleMask, VF); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11747,7 +11747,7 @@ if (((VT.getVectorNumElements() == 8 && VT.getScalarSizeInBits() == 16) || (VT.getVectorNumElements() == 16 && VT.getScalarSizeInBits() == 8)) && - ShuffleVectorInst::isReverseMask(ShuffleMask)) { + ShuffleVectorInst::isReverseMask(ShuffleMask, ShuffleMask.size())) { SDValue Rev = DAG.getNode(AArch64ISD::REV64, dl, VT, V1); return DAG.getNode(AArch64ISD::EXT, dl, VT, Rev, Rev, DAG.getConstant(8, dl, MVT::i32)); @@ -25786,7 +25786,8 @@ unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits(); unsigned MaxSVESize = Subtarget->getMaxSVEVectorSizeInBits(); if (MinSVESize == MaxSVESize && MaxSVESize == VT.getSizeInBits()) { - if (ShuffleVectorInst::isReverseMask(ShuffleMask) && Op2.isUndef()) { + if (ShuffleVectorInst::isReverseMask(ShuffleMask, ShuffleMask.size()) && + Op2.isUndef()) { Op = DAG.getNode(ISD::VECTOR_REVERSE, DL, ContainerVT, Op1); return convertFromScalableVector(DAG, VT, Op); } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -8370,7 +8370,7 @@ unsigned EltSize = VT.getScalarSizeInBits(); if (EltSize >= 32 || ShuffleVectorSDNode::isSplatMask(&M[0], VT) || - ShuffleVectorInst::isIdentityMask(M) || + ShuffleVectorInst::isIdentityMask(M, M.size()) || isVREVMask(M, VT, 64) || isVREVMask(M, VT, 32) || isVREVMask(M, VT, 16)) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4131,7 +4131,8 @@ assert(VT.getVectorElementType() == MVT::i1); - if (!ShuffleVectorInst::isReverseMask(SVN->getMask()) || + if (!ShuffleVectorInst::isReverseMask(SVN->getMask(), + SVN->getMask().size()) || !SVN->getOperand(1).isUndef()) return SDValue(); diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -351,7 +351,10 @@ LT.second.getVectorElementType().getSizeInBits() == Tp->getElementType()->getPrimitiveSizeInBits() && LT.second.getVectorNumElements() < - cast(Tp)->getNumElements()) { + cast(Tp)->getNumElements() && + divideCeil(Mask.size(), + cast(Tp)->getNumElements()) == + static_cast(*LT.first.getValue())) { unsigned NumRegs = *LT.first.getValue(); unsigned VF = cast(Tp)->getNumElements(); unsigned SubVF = PowerOf2Ceil(VF / NumRegs); @@ -529,7 +532,7 @@ InstructionCost Cost = MemCost; for (unsigned Index : Indices) { FixedVectorType *SubVecTy = - FixedVectorType::get(FVTy->getElementType(), VF); + FixedVectorType::get(FVTy->getElementType(), VF * Factor); auto Mask = createStrideMask(Index, Factor, VF); InstructionCost ShuffleCost = getShuffleCost(TTI::ShuffleKind::SK_PermuteSingleSrc, SubVecTy, Mask, diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1635,7 +1635,7 @@ NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {}, [this, SingleOpTy, CostKind, &PrevSrcReg, &PrevRegMask, &Cost](ArrayRef RegMask, unsigned SrcReg, unsigned DestReg) { - if (!ShuffleVectorInst::isIdentityMask(RegMask)) { + if (!ShuffleVectorInst::isIdentityMask(RegMask, RegMask.size())) { // Check if the previous register can be just copied to the next // one. if (PrevRegMask.empty() || PrevSrcReg != SrcReg || diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -2122,8 +2122,8 @@ NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i]; // A select mask with undef elements might look like an identity mask. - assert((ShuffleVectorInst::isSelectMask(NewMask) || - ShuffleVectorInst::isIdentityMask(NewMask)) && + assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) || + ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) && "Unexpected shuffle mask"); return new ShuffleVectorInst(X, Y, NewMask); } diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3765,7 +3765,7 @@ inversePermutation(Order, MaskOrder); } reorderReuses(MaskOrder, Mask); - if (ShuffleVectorInst::isIdentityMask(MaskOrder)) { + if (ShuffleVectorInst::isIdentityMask(MaskOrder, MaskOrder.size())) { Order.clear(); return; } @@ -4274,7 +4274,7 @@ static bool isRepeatedNonIdentityClusteredMask(ArrayRef Mask, unsigned Sz) { ArrayRef FirstCluster = Mask.slice(0, Sz); - if (ShuffleVectorInst::isIdentityMask(FirstCluster)) + if (ShuffleVectorInst::isIdentityMask(FirstCluster, Sz)) return false; for (unsigned I = Sz, E = Mask.size(); I < E; I += Sz) { ArrayRef Cluster = Mask.slice(I, Sz); @@ -6508,8 +6508,7 @@ int Limit = Mask.size(); int VF = VecTy->getNumElements(); return (VF == Limit || !IsStrict) && - all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) && - ShuffleVectorInst::isIdentityMask(Mask); + ShuffleVectorInst::isIdentityMask(Mask, VF); } /// Tries to combine 2 different masks into single one. @@ -6579,7 +6578,8 @@ if (isIdentityMask(Mask, SVTy, /*IsStrict=*/false)) { if (!IdentityOp || !SinglePermute || (isIdentityMask(Mask, SVTy, /*IsStrict=*/true) && - !ShuffleVectorInst::isZeroEltSplatMask(IdentityMask))) { + !ShuffleVectorInst::isZeroEltSplatMask(IdentityMask, + IdentityMask.size()))) { IdentityOp = SV; // Store current mask in the IdentityMask so later we did not lost // this info if IdentityOp is selected as the best candidate for the @@ -6649,7 +6649,7 @@ } if (auto *OpTy = dyn_cast(Op->getType()); !OpTy || !isIdentityMask(Mask, OpTy, SinglePermute) || - ShuffleVectorInst::isZeroEltSplatMask(Mask)) { + ShuffleVectorInst::isZeroEltSplatMask(Mask, Mask.size())) { if (IdentityOp) { V = IdentityOp; assert(Mask.size() == IdentityMask.size() && @@ -6665,7 +6665,7 @@ /*IsStrict=*/true) || (Shuffle && Mask.size() == Shuffle->getShuffleMask().size() && Shuffle->isZeroEltSplat() && - ShuffleVectorInst::isZeroEltSplatMask(Mask))); + ShuffleVectorInst::isZeroEltSplatMask(Mask, Mask.size()))); } V = Op; return false; @@ -6770,11 +6770,9 @@ CombinedMask1[I] = CombinedMask2[I] + (Op1 == Op2 ? 0 : VF); } } - const int Limit = CombinedMask1.size() * 2; - if (Op1 == Op2 && Limit == 2 * VF && - all_of(CombinedMask1, [=](int Idx) { return Idx < Limit; }) && - (ShuffleVectorInst::isIdentityMask(CombinedMask1) || - (ShuffleVectorInst::isZeroEltSplatMask(CombinedMask1) && + if (Op1 == Op2 && + (ShuffleVectorInst::isIdentityMask(CombinedMask1, VF) || + (ShuffleVectorInst::isZeroEltSplatMask(CombinedMask1, VF) && isa(Op1) && cast(Op1)->getShuffleMask() == ArrayRef(CombinedMask1)))) @@ -6944,7 +6942,10 @@ /// extracted values from \p VL. InstructionCost computeExtractCost(ArrayRef VL, ArrayRef Mask, TTI::ShuffleKind ShuffleKind) { - auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size()); + auto *VecTy = cast( + cast(*find_if(VL, [](Value *V) { + return isa(V); + }))->getVectorOperandType()); unsigned NumOfParts = TTI.getNumberOfParts(VecTy); if (ShuffleKind != TargetTransformInfo::SK_PermuteSingleSrc || @@ -7006,11 +7007,9 @@ const TargetTransformInfo &TTI; static bool isEmptyOrIdentity(ArrayRef Mask, unsigned VF) { - int Limit = 2 * VF; return Mask.empty() || (VF == Mask.size() && - all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) && - ShuffleVectorInst::isIdentityMask(Mask)); + ShuffleVectorInst::isIdentityMask(Mask, VF)); } public: @@ -7259,9 +7258,7 @@ ::addMask(CommonMask, ExtMask, /*ExtendingManyInputs=*/true); if (CommonMask.empty()) return Cost; - int Limit = CommonMask.size() * 2; - if (all_of(CommonMask, [=](int Idx) { return Idx < Limit; }) && - ShuffleVectorInst::isIdentityMask(CommonMask)) + if (ShuffleVectorInst::isIdentityMask(CommonMask, CommonMask.size())) return Cost; return Cost + createShuffle(InVectors.front(), @@ -7440,7 +7437,7 @@ } if (NeedToShuffleReuses) ::addMask(Mask, E->ReuseShuffleIndices); - if (!Mask.empty() && !ShuffleVectorInst::isIdentityMask(Mask)) + if (!Mask.empty() && !ShuffleVectorInst::isIdentityMask(Mask, Mask.size())) CommonCost = TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, FinalVecTy, Mask); assert((E->State == TreeEntry::Vectorize || @@ -7779,7 +7776,7 @@ } else { SmallVector Mask; inversePermutation(OpTE->ReorderIndices, Mask); - if (ShuffleVectorInst::isReverseMask(Mask)) + if (ShuffleVectorInst::isReverseMask(Mask, Mask.size())) CCH = TTI::CastContextHint::Reversed; } } @@ -8632,9 +8629,7 @@ unsigned VecVF = TE->getVectorFactor(); if (VF != VecVF && (any_of(Mask, [VF](int Idx) { return Idx >= static_cast(VF); }) || - (all_of(Mask, - [VF](int Idx) { return Idx < 2 * static_cast(VF); }) && - !ShuffleVectorInst::isIdentityMask(Mask)))) { + !ShuffleVectorInst::isIdentityMask(Mask, VF))) { SmallVector OrigMask(VecVF, PoisonMaskElem); std::copy(Mask.begin(), std::next(Mask.begin(), std::min(VF, VecVF)), OrigMask.begin()); @@ -8663,9 +8658,7 @@ assert((TEs.size() == 1 || TEs.size() == 2) && "Expected exactly 1 or 2 tree entries."); if (TEs.size() == 1) { - int Limit = 2 * Mask.size(); - if (!all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) || - !ShuffleVectorInst::isIdentityMask(Mask)) { + if (!ShuffleVectorInst::isIdentityMask(Mask, Mask.size())) { InstructionCost C = TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, FTy, Mask); LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C @@ -9434,7 +9427,7 @@ return V1; unsigned VF = Mask.size(); unsigned LocalVF = cast(V1->getType())->getNumElements(); - if (VF == LocalVF && ShuffleVectorInst::isIdentityMask(Mask)) + if (VF == LocalVF && ShuffleVectorInst::isIdentityMask(Mask, VF)) return V1; Value *Vec = Builder.CreateShuffleVector(V1, Mask); if (auto *I = dyn_cast(Vec)) { @@ -9826,9 +9819,7 @@ return false; unsigned I = *find_if_not(Mask, [](int Idx) { return Idx == PoisonMaskElem; }); - int Sz = Mask.size(); - if (all_of(Mask, [Sz](int Idx) { return Idx < 2 * Sz; }) && - ShuffleVectorInst::isIdentityMask(Mask)) + if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size())) std::iota(Mask.begin(), Mask.end(), 0); else std::fill(Mask.begin(), Mask.end(), I); @@ -10072,11 +10063,11 @@ (ExtractShuffle.value_or(TTI::SK_PermuteTwoSrc) == TTI::SK_PermuteSingleSrc && none_of(ExtractMask, [&](int I) { return I >= EMSz; }) && - ShuffleVectorInst::isIdentityMask(ExtractMask)) || + ShuffleVectorInst::isIdentityMask(ExtractMask, EMSz)) || (GatherShuffle.value_or(TTI::SK_PermuteTwoSrc) == TTI::SK_PermuteSingleSrc && none_of(Mask, [&](int I) { return I >= MSz; }) && - ShuffleVectorInst::isIdentityMask(Mask)); + ShuffleVectorInst::isIdentityMask(Mask, MSz)); bool EnoughConstsForShuffle = IsSingleShuffle && (none_of(GatheredScalars, @@ -10364,7 +10355,7 @@ if ((!IsIdentity || Offset != 0 || !IsFirstUndef.all()) && NumElts != NumScalars) { if (IsFirstUndef.all()) { - if (!ShuffleVectorInst::isIdentityMask(InsertMask)) { + if (!ShuffleVectorInst::isIdentityMask(InsertMask, NumElts)) { SmallBitVector IsFirstPoison = isUndefVector(FirstInsert->getOperand(0), UseMask); if (!IsFirstPoison.all()) { @@ -11171,7 +11162,7 @@ // non-resizing mask. if (Mask.size() != cast(Vals.front()->getType()) ->getNumElements() || - !ShuffleVectorInst::isIdentityMask(Mask)) + !ShuffleVectorInst::isIdentityMask(Mask, Mask.size())) return CreateShuffle(Vals.front(), nullptr, Mask); return Vals.front(); } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll @@ -464,45 +464,21 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; CHECK-LABEL: @combine_load_factor2_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP3]], align 4 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], [[STRIDED_VEC1]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP6]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 -; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] +; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]] ; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 4 ; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 ; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] ; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 4 ; CHECK-NEXT: [[RES:%.*]] = add i64 [[X0]], [[X1]] -; CHECK-NEXT: [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I]] +; CHECK-NEXT: [[DST:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[I]] ; CHECK-NEXT: store i64 [[RES]], ptr [[DST]], align 4 ; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 ; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll @@ -291,14 +291,19 @@ ; ; GFX8-LABEL: @uadd_sat_v4i16( ; GFX8-NEXT: bb: -; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> -; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 2 +; GFX8-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3 +; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 2 +; GFX8-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3 +; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) -; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> -; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> -; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]]) -; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> -; GFX8-NEXT: ret <4 x i16> [[INS_31]] +; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) +; GFX8-NEXT: [[ADD_3:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_3]], i16 [[ARG1_3]]) +; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> +; GFX8-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[ADD_2]], i64 2 +; GFX8-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3 +; GFX8-NEXT: ret <4 x i16> [[INS_3]] ; bb: %arg0.0 = extractelement <4 x i16> %arg0, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll @@ -291,14 +291,19 @@ ; ; GFX8-LABEL: @uadd_sat_v4i16( ; GFX8-NEXT: bb: -; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> -; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 2 +; GFX8-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3 +; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 2 +; GFX8-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3 +; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) -; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> -; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> -; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]]) -; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> -; GFX8-NEXT: ret <4 x i16> [[INS_31]] +; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) +; GFX8-NEXT: [[ADD_3:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_3]], i16 [[ARG1_3]]) +; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> +; GFX8-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[ADD_2]], i64 2 +; GFX8-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3 +; GFX8-NEXT: ret <4 x i16> [[INS_3]] ; bb: %arg0.0 = extractelement <4 x i16> %arg0, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll @@ -4,10 +4,15 @@ define <2 x i16> @uadd_sat_v9i16_combine_vi16(<9 x i16> %arg0, <9 x i16> %arg1) { ; CHECK-LABEL: @uadd_sat_v9i16_combine_vi16( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x i16> [[ARG0:%.*]], <9 x i16> poison, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <9 x i16> [[ARG1:%.*]], <9 x i16> poison, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) -; CHECK-NEXT: ret <2 x i16> [[TMP2]] +; CHECK-NEXT: [[ARG0_1:%.*]] = extractelement <9 x i16> undef, i64 7 +; CHECK-NEXT: [[ARG0_2:%.*]] = extractelement <9 x i16> [[ARG0:%.*]], i64 8 +; CHECK-NEXT: [[ARG1_1:%.*]] = extractelement <9 x i16> [[ARG1:%.*]], i64 7 +; CHECK-NEXT: [[ARG1_2:%.*]] = extractelement <9 x i16> [[ARG1]], i64 8 +; CHECK-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) +; CHECK-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) +; CHECK-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> undef, i16 [[ADD_1]], i64 0 +; CHECK-NEXT: [[INS_2:%.*]] = insertelement <2 x i16> [[INS_1]], i16 [[ADD_2]], i64 1 +; CHECK-NEXT: ret <2 x i16> [[INS_2]] ; bb: %arg0.1 = extractelement <9 x i16> undef, i64 7 diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll @@ -4,20 +4,23 @@ define <4 x half> @phis(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) { ; CHECK-LABEL: @phis( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1:%.*]], i64 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> ; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]] ; CHECK: bb0: -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2:%.*]], i64 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> ; CHECK-NEXT: br label [[BB1]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ] -; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> [[TMP5]], <4 x i32> -; CHECK-NEXT: ret <4 x half> [[TMP8]] +; CHECK-NEXT: [[C2:%.*]] = phi half [ [[A2]], [[ENTRY:%.*]] ], [ [[B2]], [[BB0]] ] +; CHECK-NEXT: [[C3:%.*]] = phi half [ [[A3]], [[ENTRY]] ], [ [[B3]], [[BB0]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[BB0]] ] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x half> [[TMP2]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[O2:%.*]] = insertelement <4 x half> [[TMP3]], half [[C2]], i64 2 +; CHECK-NEXT: [[O3:%.*]] = insertelement <4 x half> [[O2]], half [[C3]], i64 3 +; CHECK-NEXT: ret <4 x half> [[O3]] ; entry: %a0 = extractelement <4 x half> %in1, i64 0 @@ -49,20 +52,23 @@ define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) { ; CHECK-LABEL: @phis_reverse( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1:%.*]], i64 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> ; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]] ; CHECK: bb0: -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2:%.*]], i64 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> ; CHECK-NEXT: br label [[BB1]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ] -; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x i32> -; CHECK-NEXT: ret <4 x half> [[TMP8]] +; CHECK-NEXT: [[C3:%.*]] = phi half [ [[A3]], [[ENTRY:%.*]] ], [ [[B3]], [[BB0]] ] +; CHECK-NEXT: [[C2:%.*]] = phi half [ [[A2]], [[ENTRY]] ], [ [[B2]], [[BB0]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[BB0]] ] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x half> [[TMP2]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[O2:%.*]] = insertelement <4 x half> [[TMP3]], half [[C2]], i64 2 +; CHECK-NEXT: [[O3:%.*]] = insertelement <4 x half> [[O2]], half [[C3]], i64 3 +; CHECK-NEXT: ret <4 x half> [[O3]] ; entry: %a0 = extractelement <4 x half> %in1, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll --- a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll @@ -155,11 +155,13 @@ ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]]) ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> -; CHECK-NEXT: ret <4 x float> [[VECINS_31]] +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] ; ; DEFAULT-LABEL: define <4 x float> @exp_4x ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { @@ -171,11 +173,13 @@ ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]]) ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) -; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> -; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] ; entry: %0 = load <4 x float>, ptr %a, align 16 @@ -208,11 +212,13 @@ ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]]) ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> -; CHECK-NEXT: ret <4 x float> [[VECINS_31]] +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] ; ; DEFAULT-LABEL: define <4 x float> @int_exp_4x ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { @@ -224,11 +230,13 @@ ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]]) ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) -; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> -; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] ; entry: %0 = load <4 x float>, ptr %a, align 16 @@ -261,11 +269,13 @@ ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]]) ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> -; CHECK-NEXT: ret <4 x float> [[VECINS_31]] +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] ; ; DEFAULT-LABEL: define <4 x float> @log_4x ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { @@ -277,11 +287,13 @@ ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]]) ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) -; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> -; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] ; entry: %0 = load <4 x float>, ptr %a, align 16 @@ -314,11 +326,13 @@ ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]]) ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> -; CHECK-NEXT: ret <4 x float> [[VECINS_31]] +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] ; ; DEFAULT-LABEL: define <4 x float> @int_log_4x ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { @@ -330,11 +344,13 @@ ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]]) ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) -; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> -; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] ; entry: %0 = load <4 x float>, ptr %a, align 16 @@ -367,11 +383,13 @@ ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]]) ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> -; CHECK-NEXT: ret <4 x float> [[VECINS_31]] +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] ; ; DEFAULT-LABEL: define <4 x float> @sin_4x ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { @@ -383,11 +401,13 @@ ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]]) ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) -; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> -; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] ; entry: %0 = load <4 x float>, ptr %a, align 16 @@ -420,11 +440,13 @@ ; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> -; CHECK-NEXT: ret <4 x float> [[VECINS_31]] +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] ; ; DEFAULT-LABEL: define <4 x float> @int_sin_4x ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { @@ -436,11 +458,13 @@ ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> -; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) -; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> -; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] ; entry: %0 = load <4 x float>, ptr %a, align 16 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll @@ -2,9 +2,9 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 define <8 x float> @ceil_floor(<8 x float> %a) { ; SSE-LABEL: @ceil_floor( @@ -51,24 +51,47 @@ ; ; AVX-LABEL: @ceil_floor( ; AVX-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 +; AVX-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i64 1 +; AVX-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i64 2 ; AVX-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 ; AVX-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; AVX-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) +; AVX-NEXT: [[AB1:%.*]] = call float @llvm.floor.f32(float [[A1]]) +; AVX-NEXT: [[AB2:%.*]] = call float @llvm.floor.f32(float [[A2]]) ; AVX-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; AVX-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; AVX-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP1]]) +; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP3]]) ; AVX-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0 -; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> -; AVX-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> -; AVX-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3 -; AVX-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> -; AVX-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> -; AVX-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> -; AVX-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> +; AVX-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i64 1 +; AVX-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i64 2 +; AVX-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i64 3 +; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> +; AVX-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP5]], <8 x i32> +; AVX-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> +; AVX-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP6]], <8 x i32> ; AVX-NEXT: ret <8 x float> [[R71]] +; +; AVX2-LABEL: @ceil_floor( +; AVX2-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 +; AVX2-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 +; AVX2-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) +; AVX2-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX2-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) +; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX2-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) +; AVX2-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0 +; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> +; AVX2-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> +; AVX2-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3 +; AVX2-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> +; AVX2-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> +; AVX2-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> +; AVX2-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> +; AVX2-NEXT: ret <8 x float> [[R71]] ; %a0 = extractelement <8 x float> %a, i32 0 %a1 = extractelement <8 x float> %a, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll @@ -2,9 +2,9 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 define <8 x float> @ceil_floor(<8 x float> %a) { ; SSE-LABEL: @ceil_floor( @@ -51,24 +51,47 @@ ; ; AVX-LABEL: @ceil_floor( ; AVX-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 +; AVX-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i64 1 +; AVX-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i64 2 ; AVX-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 ; AVX-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; AVX-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) +; AVX-NEXT: [[AB1:%.*]] = call float @llvm.floor.f32(float [[A1]]) +; AVX-NEXT: [[AB2:%.*]] = call float @llvm.floor.f32(float [[A2]]) ; AVX-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) -; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; AVX-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) -; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> -; AVX-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP1]]) +; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP3]]) ; AVX-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i64 0 -; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> -; AVX-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> -; AVX-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3 -; AVX-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> -; AVX-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> -; AVX-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> -; AVX-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> +; AVX-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i64 1 +; AVX-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i64 2 +; AVX-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i64 3 +; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> +; AVX-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP5]], <8 x i32> +; AVX-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> +; AVX-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP6]], <8 x i32> ; AVX-NEXT: ret <8 x float> [[R71]] +; +; AVX2-LABEL: @ceil_floor( +; AVX2-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0 +; AVX2-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3 +; AVX2-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]]) +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]]) +; AVX2-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]]) +; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX2-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]]) +; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; AVX2-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) +; AVX2-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i64 0 +; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> +; AVX2-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> +; AVX2-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3 +; AVX2-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> +; AVX2-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> +; AVX2-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> +; AVX2-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> +; AVX2-NEXT: ret <8 x float> [[R71]] ; %a0 = extractelement <8 x float> %a, i32 0 %a1 = extractelement <8 x float> %a, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll @@ -167,24 +167,42 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) { ; SSE-LABEL: @test_v4f64( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] -; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] -; SSE-NEXT: [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> -; SSE-NEXT: ret <4 x double> [[R031]] +; SSE-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 +; SSE-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 +; SSE-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i64 2 +; SSE-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 +; SSE-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i64 0 +; SSE-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i64 1 +; SSE-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i64 2 +; SSE-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 +; SSE-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] +; SSE-NEXT: [[R1:%.*]] = fadd double [[B0]], [[B1]] +; SSE-NEXT: [[R2:%.*]] = fadd double [[A2]], [[A3]] +; SSE-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] +; SSE-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i64 0 +; SSE-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i64 1 +; SSE-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i64 2 +; SSE-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i64 3 +; SSE-NEXT: ret <4 x double> [[R03]] ; ; SLM-LABEL: @test_v4f64( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SLM-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SLM-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] -; SLM-NEXT: [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> -; SLM-NEXT: ret <4 x double> [[R031]] +; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 +; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 +; SLM-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i64 2 +; SLM-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 +; SLM-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i64 0 +; SLM-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i64 1 +; SLM-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i64 2 +; SLM-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 +; SLM-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] +; SLM-NEXT: [[R1:%.*]] = fadd double [[B0]], [[B1]] +; SLM-NEXT: [[R2:%.*]] = fadd double [[A2]], [[A3]] +; SLM-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] +; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i64 0 +; SLM-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i64 1 +; SLM-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i64 2 +; SLM-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i64 3 +; SLM-NEXT: ret <4 x double> [[R03]] ; ; AVX-LABEL: @test_v4f64( ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> @@ -213,16 +231,29 @@ ; PR50392 define <4 x double> @test_v4f64_partial_swizzle(<4 x double> %a, <4 x double> %b) { -; CHECK-LABEL: @test_v4f64_partial_swizzle( -; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 -; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B]], <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[TMP4]], double [[R3]], i64 3 -; CHECK-NEXT: ret <4 x double> [[R03]] +; SSE-LABEL: @test_v4f64_partial_swizzle( +; SSE-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 +; SSE-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 +; SSE-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i64 0 +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; SSE-NEXT: [[R031:%.*]] = shufflevector <4 x double> [[R00]], <4 x double> [[TMP4]], <4 x i32> +; SSE-NEXT: ret <4 x double> [[R031]] +; +; SLM-LABEL: @test_v4f64_partial_swizzle( +; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 +; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 +; SLM-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i64 0 +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; SLM-NEXT: [[R031:%.*]] = shufflevector <4 x double> [[R00]], <4 x double> [[TMP4]], <4 x i32> +; SLM-NEXT: ret <4 x double> [[R031]] ; %a0 = extractelement <4 x double> %a, i64 0 %a1 = extractelement <4 x double> %a, i64 1 @@ -247,13 +278,23 @@ ; SSE-NEXT: ret <8 x float> [[TMP3]] ; ; SLM-LABEL: @test_v8f32( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> -; SLM-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> -; SLM-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]] -; SLM-NEXT: [[R071:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]] +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[B:%.*]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[TMP4]], [[TMP5]] +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[TMP7]], [[TMP8]] +; SLM-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP11:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP12:%.*]] = fadd <2 x float> [[TMP10]], [[TMP11]] +; SLM-NEXT: [[R033:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP6]], <8 x i32> +; SLM-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <8 x i32> +; SLM-NEXT: [[R052:%.*]] = shufflevector <8 x float> [[R033]], <8 x float> [[TMP13]], <8 x i32> +; SLM-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <8 x i32> +; SLM-NEXT: [[R071:%.*]] = shufflevector <8 x float> [[R052]], <8 x float> [[TMP14]], <8 x i32> ; SLM-NEXT: ret <8 x float> [[R071]] ; ; AVX-LABEL: @test_v8f32( @@ -367,14 +408,48 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { ; SSE-LABEL: @test_v16i16( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> -; SSE-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]] -; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> -; SSE-NEXT: [[TMP6:%.*]] = add <8 x i16> [[TMP4]], [[TMP5]] -; SSE-NEXT: [[RV151:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> -; SSE-NEXT: ret <16 x i16> [[RV151]] +; SSE-NEXT: [[B0:%.*]] = extractelement <16 x i16> [[B:%.*]], i64 0 +; SSE-NEXT: [[B1:%.*]] = extractelement <16 x i16> [[B]], i64 1 +; SSE-NEXT: [[B2:%.*]] = extractelement <16 x i16> [[B]], i64 2 +; SSE-NEXT: [[B3:%.*]] = extractelement <16 x i16> [[B]], i64 3 +; SSE-NEXT: [[B4:%.*]] = extractelement <16 x i16> [[B]], i64 4 +; SSE-NEXT: [[B5:%.*]] = extractelement <16 x i16> [[B]], i64 5 +; SSE-NEXT: [[B6:%.*]] = extractelement <16 x i16> [[B]], i64 6 +; SSE-NEXT: [[B7:%.*]] = extractelement <16 x i16> [[B]], i64 7 +; SSE-NEXT: [[B8:%.*]] = extractelement <16 x i16> [[B]], i64 8 +; SSE-NEXT: [[B9:%.*]] = extractelement <16 x i16> [[B]], i64 9 +; SSE-NEXT: [[B10:%.*]] = extractelement <16 x i16> [[B]], i64 10 +; SSE-NEXT: [[B11:%.*]] = extractelement <16 x i16> [[B]], i64 11 +; SSE-NEXT: [[B12:%.*]] = extractelement <16 x i16> [[B]], i64 12 +; SSE-NEXT: [[B13:%.*]] = extractelement <16 x i16> [[B]], i64 13 +; SSE-NEXT: [[B14:%.*]] = extractelement <16 x i16> [[B]], i64 14 +; SSE-NEXT: [[B15:%.*]] = extractelement <16 x i16> [[B]], i64 15 +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> poison, <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = add <4 x i16> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[R4:%.*]] = add i16 [[B0]], [[B1]] +; SSE-NEXT: [[R5:%.*]] = add i16 [[B2]], [[B3]] +; SSE-NEXT: [[R6:%.*]] = add i16 [[B4]], [[B5]] +; SSE-NEXT: [[R7:%.*]] = add i16 [[B6]], [[B7]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> +; SSE-NEXT: [[TMP6:%.*]] = add <4 x i16> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[R12:%.*]] = add i16 [[B8]], [[B9]] +; SSE-NEXT: [[R13:%.*]] = add i16 [[B10]], [[B11]] +; SSE-NEXT: [[R14:%.*]] = add i16 [[B12]], [[B13]] +; SSE-NEXT: [[R15:%.*]] = add i16 [[B14]], [[B15]] +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <16 x i32> +; SSE-NEXT: [[RV4:%.*]] = insertelement <16 x i16> [[TMP7]], i16 [[R4]], i64 4 +; SSE-NEXT: [[RV5:%.*]] = insertelement <16 x i16> [[RV4]], i16 [[R5]], i64 5 +; SSE-NEXT: [[RV6:%.*]] = insertelement <16 x i16> [[RV5]], i16 [[R6]], i64 6 +; SSE-NEXT: [[RV7:%.*]] = insertelement <16 x i16> [[RV6]], i16 [[R7]], i64 7 +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> poison, <16 x i32> +; SSE-NEXT: [[RV111:%.*]] = shufflevector <16 x i16> [[RV7]], <16 x i16> [[TMP8]], <16 x i32> +; SSE-NEXT: [[RV12:%.*]] = insertelement <16 x i16> [[RV111]], i16 [[R12]], i64 12 +; SSE-NEXT: [[RV13:%.*]] = insertelement <16 x i16> [[RV12]], i16 [[R13]], i64 13 +; SSE-NEXT: [[RV14:%.*]] = insertelement <16 x i16> [[RV13]], i16 [[R14]], i64 14 +; SSE-NEXT: [[RV15:%.*]] = insertelement <16 x i16> [[RV14]], i16 [[R15]], i64 15 +; SSE-NEXT: ret <16 x i16> [[RV15]] ; ; SLM-LABEL: @test_v16i16( ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll @@ -167,24 +167,42 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) { ; SSE-LABEL: @test_v4f64( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] -; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] -; SSE-NEXT: [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> -; SSE-NEXT: ret <4 x double> [[R031]] +; SSE-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 +; SSE-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 +; SSE-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i64 2 +; SSE-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 +; SSE-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i64 0 +; SSE-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i64 1 +; SSE-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i64 2 +; SSE-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 +; SSE-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] +; SSE-NEXT: [[R1:%.*]] = fadd double [[B0]], [[B1]] +; SSE-NEXT: [[R2:%.*]] = fadd double [[A2]], [[A3]] +; SSE-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] +; SSE-NEXT: [[R00:%.*]] = insertelement <4 x double> undef, double [[R0]], i64 0 +; SSE-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i64 1 +; SSE-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i64 2 +; SSE-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i64 3 +; SSE-NEXT: ret <4 x double> [[R03]] ; ; SLM-LABEL: @test_v4f64( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SLM-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SLM-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] -; SLM-NEXT: [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> -; SLM-NEXT: ret <4 x double> [[R031]] +; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 +; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 +; SLM-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i64 2 +; SLM-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 +; SLM-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i64 0 +; SLM-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i64 1 +; SLM-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i64 2 +; SLM-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 +; SLM-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] +; SLM-NEXT: [[R1:%.*]] = fadd double [[B0]], [[B1]] +; SLM-NEXT: [[R2:%.*]] = fadd double [[A2]], [[A3]] +; SLM-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] +; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> undef, double [[R0]], i64 0 +; SLM-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i64 1 +; SLM-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i64 2 +; SLM-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i64 3 +; SLM-NEXT: ret <4 x double> [[R03]] ; ; AVX-LABEL: @test_v4f64( ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> @@ -213,16 +231,29 @@ ; PR50392 define <4 x double> @test_v4f64_partial_swizzle(<4 x double> %a, <4 x double> %b) { -; CHECK-LABEL: @test_v4f64_partial_swizzle( -; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 -; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B]], <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> undef, <4 x i32> -; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[TMP4]], double [[R3]], i64 3 -; CHECK-NEXT: ret <4 x double> [[R03]] +; SSE-LABEL: @test_v4f64_partial_swizzle( +; SSE-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 +; SSE-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 +; SSE-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[R00:%.*]] = insertelement <4 x double> undef, double [[R0]], i64 0 +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; SSE-NEXT: [[R031:%.*]] = shufflevector <4 x double> [[R00]], <4 x double> [[TMP4]], <4 x i32> +; SSE-NEXT: ret <4 x double> [[R031]] +; +; SLM-LABEL: @test_v4f64_partial_swizzle( +; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 +; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 +; SLM-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> undef, double [[R0]], i64 0 +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; SLM-NEXT: [[R031:%.*]] = shufflevector <4 x double> [[R00]], <4 x double> [[TMP4]], <4 x i32> +; SLM-NEXT: ret <4 x double> [[R031]] ; %a0 = extractelement <4 x double> %a, i64 0 %a1 = extractelement <4 x double> %a, i64 1 @@ -247,13 +278,23 @@ ; SSE-NEXT: ret <8 x float> [[TMP3]] ; ; SLM-LABEL: @test_v8f32( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> -; SLM-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> -; SLM-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]] -; SLM-NEXT: [[R071:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]] +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[B:%.*]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[TMP4]], [[TMP5]] +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[TMP7]], [[TMP8]] +; SLM-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP11:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP12:%.*]] = fadd <2 x float> [[TMP10]], [[TMP11]] +; SLM-NEXT: [[R033:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP6]], <8 x i32> +; SLM-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <8 x i32> +; SLM-NEXT: [[R052:%.*]] = shufflevector <8 x float> [[R033]], <8 x float> [[TMP13]], <8 x i32> +; SLM-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <8 x i32> +; SLM-NEXT: [[R071:%.*]] = shufflevector <8 x float> [[R052]], <8 x float> [[TMP14]], <8 x i32> ; SLM-NEXT: ret <8 x float> [[R071]] ; ; AVX-LABEL: @test_v8f32( @@ -367,14 +408,48 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { ; SSE-LABEL: @test_v16i16( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> -; SSE-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]] -; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> -; SSE-NEXT: [[TMP6:%.*]] = add <8 x i16> [[TMP4]], [[TMP5]] -; SSE-NEXT: [[RV151:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> -; SSE-NEXT: ret <16 x i16> [[RV151]] +; SSE-NEXT: [[B0:%.*]] = extractelement <16 x i16> [[B:%.*]], i64 0 +; SSE-NEXT: [[B1:%.*]] = extractelement <16 x i16> [[B]], i64 1 +; SSE-NEXT: [[B2:%.*]] = extractelement <16 x i16> [[B]], i64 2 +; SSE-NEXT: [[B3:%.*]] = extractelement <16 x i16> [[B]], i64 3 +; SSE-NEXT: [[B4:%.*]] = extractelement <16 x i16> [[B]], i64 4 +; SSE-NEXT: [[B5:%.*]] = extractelement <16 x i16> [[B]], i64 5 +; SSE-NEXT: [[B6:%.*]] = extractelement <16 x i16> [[B]], i64 6 +; SSE-NEXT: [[B7:%.*]] = extractelement <16 x i16> [[B]], i64 7 +; SSE-NEXT: [[B8:%.*]] = extractelement <16 x i16> [[B]], i64 8 +; SSE-NEXT: [[B9:%.*]] = extractelement <16 x i16> [[B]], i64 9 +; SSE-NEXT: [[B10:%.*]] = extractelement <16 x i16> [[B]], i64 10 +; SSE-NEXT: [[B11:%.*]] = extractelement <16 x i16> [[B]], i64 11 +; SSE-NEXT: [[B12:%.*]] = extractelement <16 x i16> [[B]], i64 12 +; SSE-NEXT: [[B13:%.*]] = extractelement <16 x i16> [[B]], i64 13 +; SSE-NEXT: [[B14:%.*]] = extractelement <16 x i16> [[B]], i64 14 +; SSE-NEXT: [[B15:%.*]] = extractelement <16 x i16> [[B]], i64 15 +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> poison, <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = add <4 x i16> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[R4:%.*]] = add i16 [[B0]], [[B1]] +; SSE-NEXT: [[R5:%.*]] = add i16 [[B2]], [[B3]] +; SSE-NEXT: [[R6:%.*]] = add i16 [[B4]], [[B5]] +; SSE-NEXT: [[R7:%.*]] = add i16 [[B6]], [[B7]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> +; SSE-NEXT: [[TMP6:%.*]] = add <4 x i16> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[R12:%.*]] = add i16 [[B8]], [[B9]] +; SSE-NEXT: [[R13:%.*]] = add i16 [[B10]], [[B11]] +; SSE-NEXT: [[R14:%.*]] = add i16 [[B12]], [[B13]] +; SSE-NEXT: [[R15:%.*]] = add i16 [[B14]], [[B15]] +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> undef, <16 x i32> +; SSE-NEXT: [[RV4:%.*]] = insertelement <16 x i16> [[TMP7]], i16 [[R4]], i64 4 +; SSE-NEXT: [[RV5:%.*]] = insertelement <16 x i16> [[RV4]], i16 [[R5]], i64 5 +; SSE-NEXT: [[RV6:%.*]] = insertelement <16 x i16> [[RV5]], i16 [[R6]], i64 6 +; SSE-NEXT: [[RV7:%.*]] = insertelement <16 x i16> [[RV6]], i16 [[R7]], i64 7 +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> poison, <16 x i32> +; SSE-NEXT: [[RV111:%.*]] = shufflevector <16 x i16> [[RV7]], <16 x i16> [[TMP8]], <16 x i32> +; SSE-NEXT: [[RV12:%.*]] = insertelement <16 x i16> [[RV111]], i16 [[R12]], i64 12 +; SSE-NEXT: [[RV13:%.*]] = insertelement <16 x i16> [[RV12]], i16 [[R13]], i64 13 +; SSE-NEXT: [[RV14:%.*]] = insertelement <16 x i16> [[RV13]], i16 [[R14]], i64 14 +; SSE-NEXT: [[RV15:%.*]] = insertelement <16 x i16> [[RV14]], i16 [[R15]], i64 15 +; SSE-NEXT: ret <16 x i16> [[RV15]] ; ; SLM-LABEL: @test_v16i16( ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll @@ -146,24 +146,42 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) { ; SSE-LABEL: @test_v4f64( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SSE-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] -; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SSE-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]] -; SSE-NEXT: [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> -; SSE-NEXT: ret <4 x double> [[R031]] +; SSE-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 +; SSE-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 +; SSE-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i64 2 +; SSE-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 +; SSE-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i64 0 +; SSE-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i64 1 +; SSE-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i64 2 +; SSE-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 +; SSE-NEXT: [[R0:%.*]] = fsub double [[A0]], [[A1]] +; SSE-NEXT: [[R1:%.*]] = fsub double [[B0]], [[B1]] +; SSE-NEXT: [[R2:%.*]] = fsub double [[A2]], [[A3]] +; SSE-NEXT: [[R3:%.*]] = fsub double [[B2]], [[B3]] +; SSE-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i64 0 +; SSE-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i64 1 +; SSE-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i64 2 +; SSE-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i64 3 +; SSE-NEXT: ret <4 x double> [[R03]] ; ; SLM-LABEL: @test_v4f64( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SLM-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SLM-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]] -; SLM-NEXT: [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> -; SLM-NEXT: ret <4 x double> [[R031]] +; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 +; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 +; SLM-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i64 2 +; SLM-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 +; SLM-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i64 0 +; SLM-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i64 1 +; SLM-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i64 2 +; SLM-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 +; SLM-NEXT: [[R0:%.*]] = fsub double [[A0]], [[A1]] +; SLM-NEXT: [[R1:%.*]] = fsub double [[B0]], [[B1]] +; SLM-NEXT: [[R2:%.*]] = fsub double [[A2]], [[A3]] +; SLM-NEXT: [[R3:%.*]] = fsub double [[B2]], [[B3]] +; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> poison, double [[R0]], i64 0 +; SLM-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i64 1 +; SLM-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i64 2 +; SLM-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i64 3 +; SLM-NEXT: ret <4 x double> [[R03]] ; ; AVX-LABEL: @test_v4f64( ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> @@ -198,13 +216,23 @@ ; SSE-NEXT: ret <8 x float> [[TMP3]] ; ; SLM-LABEL: @test_v8f32( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> -; SLM-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> -; SLM-NEXT: [[TMP6:%.*]] = fsub <4 x float> [[TMP4]], [[TMP5]] -; SLM-NEXT: [[R071:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = fsub <2 x float> [[TMP1]], [[TMP2]] +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[B:%.*]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP6:%.*]] = fsub <2 x float> [[TMP4]], [[TMP5]] +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP9:%.*]] = fsub <2 x float> [[TMP7]], [[TMP8]] +; SLM-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP11:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP12:%.*]] = fsub <2 x float> [[TMP10]], [[TMP11]] +; SLM-NEXT: [[R033:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP6]], <8 x i32> +; SLM-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <8 x i32> +; SLM-NEXT: [[R052:%.*]] = shufflevector <8 x float> [[R033]], <8 x float> [[TMP13]], <8 x i32> +; SLM-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <8 x i32> +; SLM-NEXT: [[R071:%.*]] = shufflevector <8 x float> [[R052]], <8 x float> [[TMP14]], <8 x i32> ; SLM-NEXT: ret <8 x float> [[R071]] ; ; AVX-LABEL: @test_v8f32( @@ -318,14 +346,48 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { ; SSE-LABEL: @test_v16i16( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> -; SSE-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]] -; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> -; SSE-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]] -; SSE-NEXT: [[RV151:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> -; SSE-NEXT: ret <16 x i16> [[RV151]] +; SSE-NEXT: [[B0:%.*]] = extractelement <16 x i16> [[B:%.*]], i64 0 +; SSE-NEXT: [[B1:%.*]] = extractelement <16 x i16> [[B]], i64 1 +; SSE-NEXT: [[B2:%.*]] = extractelement <16 x i16> [[B]], i64 2 +; SSE-NEXT: [[B3:%.*]] = extractelement <16 x i16> [[B]], i64 3 +; SSE-NEXT: [[B4:%.*]] = extractelement <16 x i16> [[B]], i64 4 +; SSE-NEXT: [[B5:%.*]] = extractelement <16 x i16> [[B]], i64 5 +; SSE-NEXT: [[B6:%.*]] = extractelement <16 x i16> [[B]], i64 6 +; SSE-NEXT: [[B7:%.*]] = extractelement <16 x i16> [[B]], i64 7 +; SSE-NEXT: [[B8:%.*]] = extractelement <16 x i16> [[B]], i64 8 +; SSE-NEXT: [[B9:%.*]] = extractelement <16 x i16> [[B]], i64 9 +; SSE-NEXT: [[B10:%.*]] = extractelement <16 x i16> [[B]], i64 10 +; SSE-NEXT: [[B11:%.*]] = extractelement <16 x i16> [[B]], i64 11 +; SSE-NEXT: [[B12:%.*]] = extractelement <16 x i16> [[B]], i64 12 +; SSE-NEXT: [[B13:%.*]] = extractelement <16 x i16> [[B]], i64 13 +; SSE-NEXT: [[B14:%.*]] = extractelement <16 x i16> [[B]], i64 14 +; SSE-NEXT: [[B15:%.*]] = extractelement <16 x i16> [[B]], i64 15 +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> poison, <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = sub <4 x i16> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[R4:%.*]] = sub i16 [[B0]], [[B1]] +; SSE-NEXT: [[R5:%.*]] = sub i16 [[B2]], [[B3]] +; SSE-NEXT: [[R6:%.*]] = sub i16 [[B4]], [[B5]] +; SSE-NEXT: [[R7:%.*]] = sub i16 [[B6]], [[B7]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> +; SSE-NEXT: [[TMP6:%.*]] = sub <4 x i16> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[R12:%.*]] = sub i16 [[B8]], [[B9]] +; SSE-NEXT: [[R13:%.*]] = sub i16 [[B10]], [[B11]] +; SSE-NEXT: [[R14:%.*]] = sub i16 [[B12]], [[B13]] +; SSE-NEXT: [[R15:%.*]] = sub i16 [[B14]], [[B15]] +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <16 x i32> +; SSE-NEXT: [[RV4:%.*]] = insertelement <16 x i16> [[TMP7]], i16 [[R4]], i64 4 +; SSE-NEXT: [[RV5:%.*]] = insertelement <16 x i16> [[RV4]], i16 [[R5]], i64 5 +; SSE-NEXT: [[RV6:%.*]] = insertelement <16 x i16> [[RV5]], i16 [[R6]], i64 6 +; SSE-NEXT: [[RV7:%.*]] = insertelement <16 x i16> [[RV6]], i16 [[R7]], i64 7 +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> poison, <16 x i32> +; SSE-NEXT: [[RV111:%.*]] = shufflevector <16 x i16> [[RV7]], <16 x i16> [[TMP8]], <16 x i32> +; SSE-NEXT: [[RV12:%.*]] = insertelement <16 x i16> [[RV111]], i16 [[R12]], i64 12 +; SSE-NEXT: [[RV13:%.*]] = insertelement <16 x i16> [[RV12]], i16 [[R13]], i64 13 +; SSE-NEXT: [[RV14:%.*]] = insertelement <16 x i16> [[RV13]], i16 [[R14]], i64 14 +; SSE-NEXT: [[RV15:%.*]] = insertelement <16 x i16> [[RV14]], i16 [[R15]], i64 15 +; SSE-NEXT: ret <16 x i16> [[RV15]] ; ; SLM-LABEL: @test_v16i16( ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll @@ -146,24 +146,42 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) { ; SSE-LABEL: @test_v4f64( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SSE-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] -; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SSE-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]] -; SSE-NEXT: [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> -; SSE-NEXT: ret <4 x double> [[R031]] +; SSE-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 +; SSE-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 +; SSE-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i64 2 +; SSE-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 +; SSE-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i64 0 +; SSE-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i64 1 +; SSE-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i64 2 +; SSE-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 +; SSE-NEXT: [[R0:%.*]] = fsub double [[A0]], [[A1]] +; SSE-NEXT: [[R1:%.*]] = fsub double [[B0]], [[B1]] +; SSE-NEXT: [[R2:%.*]] = fsub double [[A2]], [[A3]] +; SSE-NEXT: [[R3:%.*]] = fsub double [[B2]], [[B3]] +; SSE-NEXT: [[R00:%.*]] = insertelement <4 x double> undef, double [[R0]], i64 0 +; SSE-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i64 1 +; SSE-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i64 2 +; SSE-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i64 3 +; SSE-NEXT: ret <4 x double> [[R03]] ; ; SLM-LABEL: @test_v4f64( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SLM-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; SLM-NEXT: [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]] -; SLM-NEXT: [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> -; SLM-NEXT: ret <4 x double> [[R031]] +; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 +; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 +; SLM-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i64 2 +; SLM-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 +; SLM-NEXT: [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i64 0 +; SLM-NEXT: [[B1:%.*]] = extractelement <4 x double> [[B]], i64 1 +; SLM-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B]], i64 2 +; SLM-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 +; SLM-NEXT: [[R0:%.*]] = fsub double [[A0]], [[A1]] +; SLM-NEXT: [[R1:%.*]] = fsub double [[B0]], [[B1]] +; SLM-NEXT: [[R2:%.*]] = fsub double [[A2]], [[A3]] +; SLM-NEXT: [[R3:%.*]] = fsub double [[B2]], [[B3]] +; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> undef, double [[R0]], i64 0 +; SLM-NEXT: [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i64 1 +; SLM-NEXT: [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i64 2 +; SLM-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i64 3 +; SLM-NEXT: ret <4 x double> [[R03]] ; ; AVX-LABEL: @test_v4f64( ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> @@ -198,13 +216,23 @@ ; SSE-NEXT: ret <8 x float> [[TMP3]] ; ; SLM-LABEL: @test_v8f32( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> -; SLM-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> -; SLM-NEXT: [[TMP6:%.*]] = fsub <4 x float> [[TMP4]], [[TMP5]] -; SLM-NEXT: [[R071:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = fsub <2 x float> [[TMP1]], [[TMP2]] +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[B:%.*]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP6:%.*]] = fsub <2 x float> [[TMP4]], [[TMP5]] +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP9:%.*]] = fsub <2 x float> [[TMP7]], [[TMP8]] +; SLM-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP11:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> +; SLM-NEXT: [[TMP12:%.*]] = fsub <2 x float> [[TMP10]], [[TMP11]] +; SLM-NEXT: [[R033:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP6]], <8 x i32> +; SLM-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <8 x i32> +; SLM-NEXT: [[R052:%.*]] = shufflevector <8 x float> [[R033]], <8 x float> [[TMP13]], <8 x i32> +; SLM-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <8 x i32> +; SLM-NEXT: [[R071:%.*]] = shufflevector <8 x float> [[R052]], <8 x float> [[TMP14]], <8 x i32> ; SLM-NEXT: ret <8 x float> [[R071]] ; ; AVX-LABEL: @test_v8f32( @@ -318,14 +346,48 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { ; SSE-LABEL: @test_v16i16( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> -; SSE-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]] -; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> -; SSE-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]] -; SSE-NEXT: [[RV151:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> -; SSE-NEXT: ret <16 x i16> [[RV151]] +; SSE-NEXT: [[B0:%.*]] = extractelement <16 x i16> [[B:%.*]], i64 0 +; SSE-NEXT: [[B1:%.*]] = extractelement <16 x i16> [[B]], i64 1 +; SSE-NEXT: [[B2:%.*]] = extractelement <16 x i16> [[B]], i64 2 +; SSE-NEXT: [[B3:%.*]] = extractelement <16 x i16> [[B]], i64 3 +; SSE-NEXT: [[B4:%.*]] = extractelement <16 x i16> [[B]], i64 4 +; SSE-NEXT: [[B5:%.*]] = extractelement <16 x i16> [[B]], i64 5 +; SSE-NEXT: [[B6:%.*]] = extractelement <16 x i16> [[B]], i64 6 +; SSE-NEXT: [[B7:%.*]] = extractelement <16 x i16> [[B]], i64 7 +; SSE-NEXT: [[B8:%.*]] = extractelement <16 x i16> [[B]], i64 8 +; SSE-NEXT: [[B9:%.*]] = extractelement <16 x i16> [[B]], i64 9 +; SSE-NEXT: [[B10:%.*]] = extractelement <16 x i16> [[B]], i64 10 +; SSE-NEXT: [[B11:%.*]] = extractelement <16 x i16> [[B]], i64 11 +; SSE-NEXT: [[B12:%.*]] = extractelement <16 x i16> [[B]], i64 12 +; SSE-NEXT: [[B13:%.*]] = extractelement <16 x i16> [[B]], i64 13 +; SSE-NEXT: [[B14:%.*]] = extractelement <16 x i16> [[B]], i64 14 +; SSE-NEXT: [[B15:%.*]] = extractelement <16 x i16> [[B]], i64 15 +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> poison, <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = sub <4 x i16> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[R4:%.*]] = sub i16 [[B0]], [[B1]] +; SSE-NEXT: [[R5:%.*]] = sub i16 [[B2]], [[B3]] +; SSE-NEXT: [[R6:%.*]] = sub i16 [[B4]], [[B5]] +; SSE-NEXT: [[R7:%.*]] = sub i16 [[B6]], [[B7]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <4 x i32> +; SSE-NEXT: [[TMP6:%.*]] = sub <4 x i16> [[TMP4]], [[TMP5]] +; SSE-NEXT: [[R12:%.*]] = sub i16 [[B8]], [[B9]] +; SSE-NEXT: [[R13:%.*]] = sub i16 [[B10]], [[B11]] +; SSE-NEXT: [[R14:%.*]] = sub i16 [[B12]], [[B13]] +; SSE-NEXT: [[R15:%.*]] = sub i16 [[B14]], [[B15]] +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> undef, <16 x i32> +; SSE-NEXT: [[RV4:%.*]] = insertelement <16 x i16> [[TMP7]], i16 [[R4]], i64 4 +; SSE-NEXT: [[RV5:%.*]] = insertelement <16 x i16> [[RV4]], i16 [[R5]], i64 5 +; SSE-NEXT: [[RV6:%.*]] = insertelement <16 x i16> [[RV5]], i16 [[R6]], i64 6 +; SSE-NEXT: [[RV7:%.*]] = insertelement <16 x i16> [[RV6]], i16 [[R7]], i64 7 +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> poison, <16 x i32> +; SSE-NEXT: [[RV111:%.*]] = shufflevector <16 x i16> [[RV7]], <16 x i16> [[TMP8]], <16 x i32> +; SSE-NEXT: [[RV12:%.*]] = insertelement <16 x i16> [[RV111]], i16 [[R12]], i64 12 +; SSE-NEXT: [[RV13:%.*]] = insertelement <16 x i16> [[RV12]], i16 [[R13]], i64 13 +; SSE-NEXT: [[RV14:%.*]] = insertelement <16 x i16> [[RV13]], i16 [[R14]], i64 14 +; SSE-NEXT: [[RV15:%.*]] = insertelement <16 x i16> [[RV14]], i16 [[R15]], i64 15 +; SSE-NEXT: ret <16 x i16> [[RV15]] ; ; SLM-LABEL: @test_v16i16( ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-transpose.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-transpose.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-transpose.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-transpose.ll @@ -28,15 +28,13 @@ ; ; SSE42-LABEL: @reduce_and4( ; SSE42-NEXT: entry: -; SSE42-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[V4:%.*]]) -; SSE42-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[V3:%.*]]) -; SSE42-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP0]], [[TMP1]] -; SSE42-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[V2:%.*]]) -; SSE42-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[TMP2]] -; SSE42-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[V1:%.*]]) -; SSE42-NEXT: [[OP_RDX2:%.*]] = and i32 [[OP_RDX1]], [[TMP3]] -; SSE42-NEXT: [[OP_RDX3:%.*]] = and i32 [[OP_RDX2]], [[ACC:%.*]] -; SSE42-NEXT: ret i32 [[OP_RDX3]] +; SSE42-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> +; SSE42-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> +; SSE42-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]]) +; SSE42-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]]) +; SSE42-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2]], [[TMP3]] +; SSE42-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]] +; SSE42-NEXT: ret i32 [[OP_RDX1]] ; ; AVX-LABEL: @reduce_and4( ; AVX-NEXT: entry: @@ -103,15 +101,13 @@ ; SSE2-NEXT: ret i32 [[OP_RDX1]] ; ; SSE42-LABEL: @reduce_and4_transpose( -; SSE42-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[V4:%.*]]) -; SSE42-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[V3:%.*]]) -; SSE42-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP1]], [[TMP2]] -; SSE42-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[V2:%.*]]) -; SSE42-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[TMP3]] -; SSE42-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[V1:%.*]]) -; SSE42-NEXT: [[OP_RDX2:%.*]] = and i32 [[OP_RDX1]], [[TMP4]] -; SSE42-NEXT: [[OP_RDX3:%.*]] = and i32 [[OP_RDX2]], [[ACC:%.*]] -; SSE42-NEXT: ret i32 [[OP_RDX3]] +; SSE42-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> +; SSE42-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> +; SSE42-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]]) +; SSE42-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]]) +; SSE42-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP3]], [[TMP4]] +; SSE42-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]] +; SSE42-NEXT: ret i32 [[OP_RDX1]] ; ; AVX-LABEL: @reduce_and4_transpose( ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> diff --git a/llvm/unittests/IR/InstructionsTest.cpp b/llvm/unittests/IR/InstructionsTest.cpp --- a/llvm/unittests/IR/InstructionsTest.cpp +++ b/llvm/unittests/IR/InstructionsTest.cpp @@ -1024,71 +1024,141 @@ Constant *C7 = ConstantInt::get(Int32Ty, 7); Constant *Identity = ConstantVector::get({C0, CU, C2, C3, C4}); - EXPECT_TRUE(ShuffleVectorInst::isIdentityMask(Identity)); - EXPECT_FALSE(ShuffleVectorInst::isSelectMask(Identity)); // identity is distinguished from select - EXPECT_FALSE(ShuffleVectorInst::isReverseMask(Identity)); - EXPECT_TRUE(ShuffleVectorInst::isSingleSourceMask(Identity)); // identity is always single source - EXPECT_FALSE(ShuffleVectorInst::isZeroEltSplatMask(Identity)); - EXPECT_FALSE(ShuffleVectorInst::isTransposeMask(Identity)); + EXPECT_TRUE(ShuffleVectorInst::isIdentityMask( + Identity, cast(Identity->getType())->getNumElements())); + EXPECT_FALSE(ShuffleVectorInst::isSelectMask( + Identity, + cast(Identity->getType()) + ->getNumElements())); // identity is distinguished from select + EXPECT_FALSE(ShuffleVectorInst::isReverseMask( + Identity, cast(Identity->getType())->getNumElements())); + EXPECT_TRUE(ShuffleVectorInst::isSingleSourceMask( + Identity, cast(Identity->getType()) + ->getNumElements())); // identity is always single source + EXPECT_FALSE(ShuffleVectorInst::isZeroEltSplatMask( + Identity, cast(Identity->getType())->getNumElements())); + EXPECT_FALSE(ShuffleVectorInst::isTransposeMask( + Identity, cast(Identity->getType())->getNumElements())); Constant *Select = ConstantVector::get({CU, C1, C5}); - EXPECT_FALSE(ShuffleVectorInst::isIdentityMask(Select)); - EXPECT_TRUE(ShuffleVectorInst::isSelectMask(Select)); - EXPECT_FALSE(ShuffleVectorInst::isReverseMask(Select)); - EXPECT_FALSE(ShuffleVectorInst::isSingleSourceMask(Select)); - EXPECT_FALSE(ShuffleVectorInst::isZeroEltSplatMask(Select)); - EXPECT_FALSE(ShuffleVectorInst::isTransposeMask(Select)); - + EXPECT_FALSE(ShuffleVectorInst::isIdentityMask( + Select, cast(Select->getType())->getNumElements())); + EXPECT_TRUE(ShuffleVectorInst::isSelectMask( + Select, cast(Select->getType())->getNumElements())); + EXPECT_FALSE(ShuffleVectorInst::isReverseMask( + Select, cast(Select->getType())->getNumElements())); + EXPECT_FALSE(ShuffleVectorInst::isSingleSourceMask( + Select, cast(Select->getType())->getNumElements())); + EXPECT_FALSE(ShuffleVectorInst::isZeroEltSplatMask( + Select, cast(Select->getType())->getNumElements())); + EXPECT_FALSE(ShuffleVectorInst::isTransposeMask( + Select, cast(Select->getType())->getNumElements())); + Constant *Reverse = ConstantVector::get({C3, C2, C1, CU}); - EXPECT_FALSE(ShuffleVectorInst::isIdentityMask(Reverse)); - EXPECT_FALSE(ShuffleVectorInst::isSelectMask(Reverse)); - EXPECT_TRUE(ShuffleVectorInst::isReverseMask(Reverse)); - EXPECT_TRUE(ShuffleVectorInst::isSingleSourceMask(Reverse)); // reverse is always single source - EXPECT_FALSE(ShuffleVectorInst::isZeroEltSplatMask(Reverse)); - EXPECT_FALSE(ShuffleVectorInst::isTransposeMask(Reverse)); + EXPECT_FALSE(ShuffleVectorInst::isIdentityMask( + Reverse, cast(Reverse->getType())->getNumElements())); + EXPECT_FALSE(ShuffleVectorInst::isSelectMask( + Reverse, cast(Reverse->getType())->getNumElements())); + EXPECT_TRUE(ShuffleVectorInst::isReverseMask( + Reverse, cast(Reverse->getType())->getNumElements())); + EXPECT_TRUE(ShuffleVectorInst::isSingleSourceMask( + Reverse, cast(Reverse->getType()) + ->getNumElements())); // reverse is always single source + EXPECT_FALSE(ShuffleVectorInst::isZeroEltSplatMask( + Reverse, cast(Reverse->getType())->getNumElements())); + EXPECT_FALSE(ShuffleVectorInst::isTransposeMask( + Reverse, cast(Reverse->getType())->getNumElements())); Constant *SingleSource = ConstantVector::get({C2, C2, C0, CU}); - EXPECT_FALSE(ShuffleVectorInst::isIdentityMask(SingleSource)); - EXPECT_FALSE(ShuffleVectorInst::isSelectMask(SingleSource)); - EXPECT_FALSE(ShuffleVectorInst::isReverseMask(SingleSource)); - EXPECT_TRUE(ShuffleVectorInst::isSingleSourceMask(SingleSource)); - EXPECT_FALSE(ShuffleVectorInst::isZeroEltSplatMask(SingleSource)); - EXPECT_FALSE(ShuffleVectorInst::isTransposeMask(SingleSource)); + EXPECT_FALSE(ShuffleVectorInst::isIdentityMask( + SingleSource, + cast(SingleSource->getType())->getNumElements())); + EXPECT_FALSE(ShuffleVectorInst::isSelectMask( + SingleSource, + cast(SingleSource->getType())->getNumElements())); + EXPECT_FALSE(ShuffleVectorInst::isReverseMask( + SingleSource, + cast(SingleSource->getType())->getNumElements())); + EXPECT_TRUE(ShuffleVectorInst::isSingleSourceMask( + SingleSource, + cast(SingleSource->getType())->getNumElements())); + EXPECT_FALSE(ShuffleVectorInst::isZeroEltSplatMask( + SingleSource, + cast(SingleSource->getType())->getNumElements())); + EXPECT_FALSE(ShuffleVectorInst::isTransposeMask( + SingleSource, + cast(SingleSource->getType())->getNumElements())); Constant *ZeroEltSplat = ConstantVector::get({C0, C0, CU, C0}); - EXPECT_FALSE(ShuffleVectorInst::isIdentityMask(ZeroEltSplat)); - EXPECT_FALSE(ShuffleVectorInst::isSelectMask(ZeroEltSplat)); - EXPECT_FALSE(ShuffleVectorInst::isReverseMask(ZeroEltSplat)); - EXPECT_TRUE(ShuffleVectorInst::isSingleSourceMask(ZeroEltSplat)); // 0-splat is always single source - EXPECT_TRUE(ShuffleVectorInst::isZeroEltSplatMask(ZeroEltSplat)); - EXPECT_FALSE(ShuffleVectorInst::isTransposeMask(ZeroEltSplat)); + EXPECT_FALSE(ShuffleVectorInst::isIdentityMask( + ZeroEltSplat, + cast(ZeroEltSplat->getType())->getNumElements())); + EXPECT_FALSE(ShuffleVectorInst::isSelectMask( + ZeroEltSplat, + cast(ZeroEltSplat->getType())->getNumElements())); + EXPECT_FALSE(ShuffleVectorInst::isReverseMask( + ZeroEltSplat, + cast(ZeroEltSplat->getType())->getNumElements())); + EXPECT_TRUE(ShuffleVectorInst::isSingleSourceMask( + ZeroEltSplat, cast(ZeroEltSplat->getType()) + ->getNumElements())); // 0-splat is always single source + EXPECT_TRUE(ShuffleVectorInst::isZeroEltSplatMask( + ZeroEltSplat, + cast(ZeroEltSplat->getType())->getNumElements())); + EXPECT_FALSE(ShuffleVectorInst::isTransposeMask( + ZeroEltSplat, + cast(ZeroEltSplat->getType())->getNumElements())); Constant *Transpose = ConstantVector::get({C0, C4, C2, C6}); - EXPECT_FALSE(ShuffleVectorInst::isIdentityMask(Transpose)); - EXPECT_FALSE(ShuffleVectorInst::isSelectMask(Transpose)); - EXPECT_FALSE(ShuffleVectorInst::isReverseMask(Transpose)); - EXPECT_FALSE(ShuffleVectorInst::isSingleSourceMask(Transpose)); - EXPECT_FALSE(ShuffleVectorInst::isZeroEltSplatMask(Transpose)); - EXPECT_TRUE(ShuffleVectorInst::isTransposeMask(Transpose)); + EXPECT_FALSE(ShuffleVectorInst::isIdentityMask( + Transpose, + cast(Transpose->getType())->getNumElements())); + EXPECT_FALSE(ShuffleVectorInst::isSelectMask( + Transpose, + cast(Transpose->getType())->getNumElements())); + EXPECT_FALSE(ShuffleVectorInst::isReverseMask( + Transpose, + cast(Transpose->getType())->getNumElements())); + EXPECT_FALSE(ShuffleVectorInst::isSingleSourceMask( + Transpose, + cast(Transpose->getType())->getNumElements())); + EXPECT_FALSE(ShuffleVectorInst::isZeroEltSplatMask( + Transpose, + cast(Transpose->getType())->getNumElements())); + EXPECT_TRUE(ShuffleVectorInst::isTransposeMask( + Transpose, + cast(Transpose->getType())->getNumElements())); // More tests to make sure the logic is/stays correct... - EXPECT_TRUE(ShuffleVectorInst::isIdentityMask(ConstantVector::get({CU, C1, CU, C3}))); - EXPECT_TRUE(ShuffleVectorInst::isIdentityMask(ConstantVector::get({C4, CU, C6, CU}))); - - EXPECT_TRUE(ShuffleVectorInst::isSelectMask(ConstantVector::get({C4, C1, C6, CU}))); - EXPECT_TRUE(ShuffleVectorInst::isSelectMask(ConstantVector::get({CU, C1, C6, C3}))); - - EXPECT_TRUE(ShuffleVectorInst::isReverseMask(ConstantVector::get({C7, C6, CU, C4}))); - EXPECT_TRUE(ShuffleVectorInst::isReverseMask(ConstantVector::get({C3, CU, C1, CU}))); - - EXPECT_TRUE(ShuffleVectorInst::isSingleSourceMask(ConstantVector::get({C7, C5, CU, C7}))); - EXPECT_TRUE(ShuffleVectorInst::isSingleSourceMask(ConstantVector::get({C3, C0, CU, C3}))); - - EXPECT_TRUE(ShuffleVectorInst::isZeroEltSplatMask(ConstantVector::get({C4, CU, CU, C4}))); - EXPECT_TRUE(ShuffleVectorInst::isZeroEltSplatMask(ConstantVector::get({CU, C0, CU, C0}))); - - EXPECT_TRUE(ShuffleVectorInst::isTransposeMask(ConstantVector::get({C1, C5, C3, C7}))); - EXPECT_TRUE(ShuffleVectorInst::isTransposeMask(ConstantVector::get({C1, C3}))); + EXPECT_TRUE(ShuffleVectorInst::isIdentityMask( + ConstantVector::get({CU, C1, CU, C3}), 4)); + EXPECT_TRUE(ShuffleVectorInst::isIdentityMask( + ConstantVector::get({C4, CU, C6, CU}), 4)); + + EXPECT_TRUE(ShuffleVectorInst::isSelectMask( + ConstantVector::get({C4, C1, C6, CU}), 4)); + EXPECT_TRUE(ShuffleVectorInst::isSelectMask( + ConstantVector::get({CU, C1, C6, C3}), 4)); + + EXPECT_TRUE(ShuffleVectorInst::isReverseMask( + ConstantVector::get({C7, C6, CU, C4}), 4)); + EXPECT_TRUE(ShuffleVectorInst::isReverseMask( + ConstantVector::get({C3, CU, C1, CU}), 4)); + + EXPECT_TRUE(ShuffleVectorInst::isSingleSourceMask( + ConstantVector::get({C7, C5, CU, C7}), 4)); + EXPECT_TRUE(ShuffleVectorInst::isSingleSourceMask( + ConstantVector::get({C3, C0, CU, C3}), 4)); + + EXPECT_TRUE(ShuffleVectorInst::isZeroEltSplatMask( + ConstantVector::get({C4, CU, CU, C4}), 4)); + EXPECT_TRUE(ShuffleVectorInst::isZeroEltSplatMask( + ConstantVector::get({CU, C0, CU, C0}), 4)); + + EXPECT_TRUE(ShuffleVectorInst::isTransposeMask( + ConstantVector::get({C1, C5, C3, C7}), 4)); + EXPECT_TRUE( + ShuffleVectorInst::isTransposeMask(ConstantVector::get({C1, C3}), 2)); // Nothing special about the values here - just re-using inputs to reduce code. Constant *V0 = ConstantVector::get({C0, C1, C2, C3}); diff --git a/llvm/unittests/IR/ShuffleVectorInstTest.cpp b/llvm/unittests/IR/ShuffleVectorInstTest.cpp --- a/llvm/unittests/IR/ShuffleVectorInstTest.cpp +++ b/llvm/unittests/IR/ShuffleVectorInstTest.cpp @@ -14,51 +14,51 @@ namespace { TEST(ShuffleVectorInst, isIdentityMask) { - ASSERT_TRUE(ShuffleVectorInst::isIdentityMask({0, 1, 2, 3})); - ASSERT_TRUE(ShuffleVectorInst::isIdentityMask({0, 1, 2, 3, -1})); - ASSERT_TRUE(ShuffleVectorInst::isIdentityMask({0, 1, -1, 3})); + ASSERT_TRUE(ShuffleVectorInst::isIdentityMask({0, 1, 2, 3}, 4)); + ASSERT_TRUE(ShuffleVectorInst::isIdentityMask({0, 1, 2, 3, -1}, 5)); + ASSERT_TRUE(ShuffleVectorInst::isIdentityMask({0, 1, -1, 3}, 4)); - ASSERT_FALSE(ShuffleVectorInst::isIdentityMask({0, 1, 2, 4})); - ASSERT_FALSE(ShuffleVectorInst::isIdentityMask({0, -1, 2, 4})); + ASSERT_FALSE(ShuffleVectorInst::isIdentityMask({0, 1, 2, 4}, 4)); + ASSERT_FALSE(ShuffleVectorInst::isIdentityMask({0, -1, 2, 4}, 4)); } TEST(ShuffleVectorInst, isSelectMask) { - ASSERT_TRUE(ShuffleVectorInst::isSelectMask({0, 5, 6, 3})); + ASSERT_TRUE(ShuffleVectorInst::isSelectMask({0, 5, 6, 3}, 4)); - ASSERT_FALSE(ShuffleVectorInst::isSelectMask({0, 1, 2, 3})); + ASSERT_FALSE(ShuffleVectorInst::isSelectMask({0, 1, 2, 3}, 4)); } TEST(ShuffleVectorInst, isReverseMask) { - ASSERT_TRUE(ShuffleVectorInst::isReverseMask({3, 2, 1, 0})); - ASSERT_TRUE(ShuffleVectorInst::isReverseMask({-1, -1, 1, 0})); + ASSERT_TRUE(ShuffleVectorInst::isReverseMask({3, 2, 1, 0}, 4)); + ASSERT_TRUE(ShuffleVectorInst::isReverseMask({-1, -1, 1, 0}, 4)); - ASSERT_FALSE(ShuffleVectorInst::isReverseMask({4, 3, 2, 1})); + ASSERT_FALSE(ShuffleVectorInst::isReverseMask({4, 3, 2, 1}, 4)); } TEST(ShuffleVectorInst, isZeroEltSplatMask) { - ASSERT_TRUE(ShuffleVectorInst::isZeroEltSplatMask({0, 0, 0, 0})); - ASSERT_TRUE(ShuffleVectorInst::isZeroEltSplatMask({0, -1, 0, -1})); + ASSERT_TRUE(ShuffleVectorInst::isZeroEltSplatMask({0, 0, 0, 0}, 4)); + ASSERT_TRUE(ShuffleVectorInst::isZeroEltSplatMask({0, -1, 0, -1}, 4)); - ASSERT_FALSE(ShuffleVectorInst::isZeroEltSplatMask({1, 1, 1, 1})); + ASSERT_FALSE(ShuffleVectorInst::isZeroEltSplatMask({1, 1, 1, 1}, 4)); } TEST(ShuffleVectorInst, isTransposeMask) { - ASSERT_TRUE(ShuffleVectorInst::isTransposeMask({0, 4, 2, 6})); - ASSERT_TRUE(ShuffleVectorInst::isTransposeMask({1, 5, 3, 7})); + ASSERT_TRUE(ShuffleVectorInst::isTransposeMask({0, 4, 2, 6}, 4)); + ASSERT_TRUE(ShuffleVectorInst::isTransposeMask({1, 5, 3, 7}, 4)); - ASSERT_FALSE(ShuffleVectorInst::isTransposeMask({2, 6, 4, 8})); + ASSERT_FALSE(ShuffleVectorInst::isTransposeMask({2, 6, 4, 8}, 4)); } TEST(ShuffleVectorInst, isSpliceMask) { int Index; - ASSERT_TRUE(ShuffleVectorInst::isSpliceMask({0, 1, 2, 3}, Index)); + ASSERT_TRUE(ShuffleVectorInst::isSpliceMask({0, 1, 2, 3}, 4, Index)); ASSERT_EQ(0, Index); - ASSERT_TRUE(ShuffleVectorInst::isSpliceMask({1, 2, 3, 4, 5, 6, 7}, Index)); + ASSERT_TRUE(ShuffleVectorInst::isSpliceMask({1, 2, 3, 4, 5, 6, 7}, 7, Index)); ASSERT_EQ(1, Index); - ASSERT_FALSE(ShuffleVectorInst::isSpliceMask({4, 5, 6, 7}, Index)); + ASSERT_FALSE(ShuffleVectorInst::isSpliceMask({4, 5, 6, 7}, 4, Index)); } TEST(ShuffleVectorInst, isExtractSubvectorMask) {