diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -327,7 +327,11 @@ /// TargetTransformInfo::getInstructionThroughput? static Optional isFixedVectorShuffle(ArrayRef VL, SmallVectorImpl &Mask) { - auto *EI0 = cast(VL[0]); + const auto *It = + find_if(VL, [](Value *V) { return isa(V); }); + if (It == VL.end()) + return None; + auto *EI0 = cast(*It); if (isa(EI0->getVectorOperandType())) return None; unsigned Size = @@ -336,33 +340,41 @@ Value *Vec2 = nullptr; enum ShuffleMode { Unknown, Select, Permute }; ShuffleMode CommonShuffleMode = Unknown; + Mask.assign(VL.size(), UndefMaskElem); for (unsigned I = 0, E = VL.size(); I < E; ++I) { + // Undef can be represented as an undef element in a vector. + if (isa(VL[I])) + continue; auto *EI = cast(VL[I]); + if (isa(EI->getVectorOperandType())) + return None; auto *Vec = EI->getVectorOperand(); + // We can extractelement from undef or poison vector. + if (isa(Vec)) + continue; // All vector operands must have the same number of vector elements. if (cast(Vec->getType())->getNumElements() != Size) return None; + if (isa(EI->getIndexOperand())) + continue; auto *Idx = dyn_cast(EI->getIndexOperand()); if (!Idx) return None; // Undefined behavior if Idx is negative or >= Size. - if (Idx->getValue().uge(Size)) { - Mask.push_back(UndefMaskElem); + if (Idx->getValue().uge(Size)) continue; - } unsigned IntIdx = Idx->getValue().getZExtValue(); - Mask.push_back(IntIdx); - // We can extractelement from undef or poison vector. - if (isa(Vec)) - continue; + Mask[I] = IntIdx; // For correct shuffling we have to have at most 2 different vector operands // in all extractelement instructions. - if (!Vec1 || Vec1 == Vec) + if (!Vec1 || Vec1 == Vec) { Vec1 = Vec; - else if (!Vec2 || Vec2 == Vec) + } else if (!Vec2 || Vec2 == Vec) { Vec2 = Vec; - else + Mask[I] += Size; + } else { return None; + } if (CommonShuffleMode == Permute) continue; // If the extract index is not the same as the operation number, it is a @@ -4380,15 +4392,19 @@ bool IsGather) { DenseMap ExtractVectorsTys; for (auto *V : VL) { + if (isa(V)) + continue; // If all users of instruction are going to be vectorized and this // instruction itself is not going to be vectorized, consider this // instruction as dead and remove its cost from the final cost of the // vectorized tree. - if (!areAllUsersVectorized(cast(V), VectorizedVals) || - (IsGather && ScalarToTreeEntry.count(V))) + if (!areAllUsersVectorized(cast(V), VectorizedVals)) continue; auto *EE = cast(V); - unsigned Idx = *getExtractIndex(EE); + Optional EEIdx = getExtractIndex(EE); + if (!EEIdx) + continue; + unsigned Idx = *EEIdx; if (TTIRef.getNumberOfParts(VecTy) != TTIRef.getNumberOfParts(EE->getVectorOperandType())) { auto It = @@ -4420,6 +4436,8 @@ for (const auto &Data : ExtractVectorsTys) { auto *EEVTy = cast(Data.first->getType()); unsigned NumElts = VecTy->getNumElements(); + if (Data.second % NumElts == 0) + continue; if (TTIRef.getNumberOfParts(EEVTy) > TTIRef.getNumberOfParts(VecTy)) { unsigned Idx = (Data.second / NumElts) * NumElts; unsigned EENumElts = EEVTy->getNumElements(); @@ -4482,10 +4500,12 @@ // broadcast. return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy); } - if (E->getOpcode() == Instruction::ExtractElement && allSameType(VL) && - allSameBlock(VL) && - !isa( - cast(E->getMainOp())->getVectorOperandType())) { + if ((E->getOpcode() == Instruction::ExtractElement || + all_of(E->Scalars, + [](Value *V) { + return isa(V); + })) && + allSameType(VL)) { // Check that gather of extractelements can be represented as just a // shuffle of a single/two vectors the scalars are extracted from. SmallVector Mask; @@ -5054,7 +5074,11 @@ [this](Value *V) { return EphValues.contains(V); }) && (allConstant(TE->Scalars) || isSplat(TE->Scalars) || TE->Scalars.size() < Limit || - (TE->getOpcode() == Instruction::ExtractElement && + ((TE->getOpcode() == Instruction::ExtractElement || + all_of(TE->Scalars, + [](Value *V) { + return isa(V); + })) && isFixedVectorShuffle(TE->Scalars, Mask)) || (TE->State == TreeEntry::NeedToGather && TE->getOpcode() == Instruction::Load && !TE->isAltShuffle())); @@ -9127,8 +9151,9 @@ SmallVector BuildVectorOpds; SmallVector Mask; if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, BuildVectorInsts) || - (llvm::all_of(BuildVectorOpds, - [](Value *V) { return isa(V); }) && + (llvm::all_of( + BuildVectorOpds, + [](Value *V) { return isa(V); }) && isFixedVectorShuffle(BuildVectorOpds, Mask))) return false; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll @@ -230,25 +230,21 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-LABEL: @ashr_lshr_shl_v8i32( -; SSE-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 6 -; SSE-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7 -; SSE-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 6 -; SSE-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7 -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> ; SSE-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; SSE-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = lshr <8 x i32> [[A]], [[B]] ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <2 x i32> -; SSE-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]] -; SSE-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]] -; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> -; SSE-NEXT: [[R51:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> -; SSE-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R51]], i32 [[AB6]], i32 6 -; SSE-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7 -; SSE-NEXT: ret <8 x i32> [[R7]] +; SSE-NEXT: [[TMP8:%.*]] = shl <8 x i32> [[A]], [[B]] +; SSE-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <2 x i32> +; SSE-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> +; SSE-NEXT: [[R52:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <8 x i32> +; SSE-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <8 x i32> +; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[R52]], <8 x i32> [[TMP12]], <8 x i32> +; SSE-NEXT: ret <8 x i32> [[R71]] ; ; SLM-LABEL: @ashr_lshr_shl_v8i32( ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll @@ -230,25 +230,21 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-LABEL: @ashr_lshr_shl_v8i32( -; SSE-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 6 -; SSE-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7 -; SSE-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 6 -; SSE-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7 -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> undef, <4 x i32> ; SSE-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] ; SSE-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = lshr <8 x i32> [[A]], [[B]] ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <2 x i32> -; SSE-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]] -; SSE-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]] -; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> -; SSE-NEXT: [[R51:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> -; SSE-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R51]], i32 [[AB6]], i32 6 -; SSE-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7 -; SSE-NEXT: ret <8 x i32> [[R7]] +; SSE-NEXT: [[TMP8:%.*]] = shl <8 x i32> [[A]], [[B]] +; SSE-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <2 x i32> +; SSE-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> +; SSE-NEXT: [[R52:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <8 x i32> +; SSE-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <8 x i32> +; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[R52]], <8 x i32> [[TMP12]], <8 x i32> +; SSE-NEXT: ret <8 x i32> [[R71]] ; ; SLM-LABEL: @ashr_lshr_shl_v8i32( ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32>