diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4264,18 +4264,16 @@ if (E->isSame(VL)) { Value *V = vectorizeTree(E); if (VL.size() == E->Scalars.size() && !E->ReuseShuffleIndices.empty()) { - // We need to get the vectorized value but without shuffle. - if (auto *SV = dyn_cast(V)) { - V = SV->getOperand(0); - } else { - // Reshuffle to get only unique values. - SmallVector UniqueIdxs; - SmallSet UsedIdxs; - for (int Idx : E->ReuseShuffleIndices) - if (UsedIdxs.insert(Idx).second) - UniqueIdxs.emplace_back(Idx); - V = Builder.CreateShuffleVector(V, UniqueIdxs); + // Reshuffle to get only unique values. + SmallVector UniqueIdxs; + SmallSet UsedIdxs; + int Pos = 0; + for (int Idx : E->ReuseShuffleIndices) { + if (UsedIdxs.insert(Idx).second) + UniqueIdxs.emplace_back(Pos); + ++Pos; } + V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle"); } return V; } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll @@ -10,7 +10,8 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[PTR1:%.*]], i32 3 -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[TMP1]], +; CHECK-NEXT: [[SHRINK_SHUFFLE:%.*]] = shufflevector <4 x i32> [[SHUFFLE]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[SHRINK_SHUFFLE]], ; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 4 ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 5 @@ -65,7 +66,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[ENTRY:%.*]] ], [ [[TMP0]], [[IF_END:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[ENTRY:%.*]] ], [ [[SHRINK_SHUFFLE:%.*]], [[IF_END:%.*]] ] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> poison, <8 x i32> ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: @@ -77,6 +78,7 @@ ; CHECK-NEXT: [[ARRAYIDX11_6:%.*]] = getelementptr inbounds i16, i16* undef, i32 6 ; CHECK-NEXT: [[ARRAYIDX11_7:%.*]] = getelementptr inbounds i16, i16* undef, i32 7 ; CHECK-NEXT: store <8 x i16> [[SHUFFLE]], <8 x i16>* undef, align 2 +; CHECK-NEXT: [[SHRINK_SHUFFLE]] = shufflevector <8 x i16> [[SHUFFLE]], <8 x i16> poison, <2 x i32> ; CHECK-NEXT: br label [[FOR_BODY]] ; entry: