diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2714,8 +2714,9 @@ /// Values used only by @llvm.assume calls. SmallPtrSet EphValues; - /// Holds all of the instructions that we gathered. - SetVector GatherShuffleSeq; + /// Holds all of the instructions that we gathered, shuffle instructions and + /// extractelements. + SetVector GatherShuffleExtractSeq; /// A list of blocks that we are going to CSE. SetVector CSEBlocks; @@ -7786,7 +7787,7 @@ auto *InsElt = dyn_cast(Vec); if (!InsElt) return Vec; - GatherShuffleSeq.insert(InsElt); + GatherShuffleExtractSeq.insert(InsElt); CSEBlocks.insert(InsElt->getParent()); // Add to our 'need-to-extract' list. if (TreeEntry *Entry = getTreeEntry(V)) { @@ -7940,7 +7941,7 @@ V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle"); } if (auto *I = dyn_cast(V)) { - GatherShuffleSeq.insert(I); + GatherShuffleExtractSeq.insert(I); CSEBlocks.insert(I->getParent()); } } @@ -8005,7 +8006,7 @@ VL = UniqueValues; } - ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq, + ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleExtractSeq, CSEBlocks); Value *Vec = gather(VL); if (!ReuseShuffleIndicies.empty()) { @@ -8025,7 +8026,7 @@ bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); unsigned VF = E->getVectorFactor(); - ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq, + ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleExtractSeq, CSEBlocks); if (E->State == TreeEntry::NeedToGather) { if (E->getMainOp()) @@ -8041,7 +8042,7 @@ Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue, Entries.back()->VectorizedValue, Mask); if (auto *I = dyn_cast(Vec)) { - GatherShuffleSeq.insert(I); + GatherShuffleExtractSeq.insert(I); CSEBlocks.insert(I->getParent()); } } else { @@ -8173,7 +8174,7 @@ if (!IsIdentity || NumElts != NumScalars) { V = Builder.CreateShuffleVector(V, Mask); if (auto *I = dyn_cast(V)) { - GatherShuffleSeq.insert(I); + GatherShuffleExtractSeq.insert(I); CSEBlocks.insert(I->getParent()); } } @@ -8191,7 +8192,7 @@ V = Builder.CreateShuffleVector( V, InsertMask, cast(E->Scalars.back())->getName()); if (auto *I = dyn_cast(V)) { - GatherShuffleSeq.insert(I); + GatherShuffleExtractSeq.insert(I); CSEBlocks.insert(I->getParent()); } // Create freeze for undef values. @@ -8209,7 +8210,7 @@ FirstInsert->getOperand(0), V, InsertMask, cast(E->Scalars.back())->getName()); if (auto *I = dyn_cast(V)) { - GatherShuffleSeq.insert(I); + GatherShuffleExtractSeq.insert(I); CSEBlocks.insert(I->getParent()); } } @@ -8587,7 +8588,7 @@ // instruction, if any. for (Value *V : {V0, V1}) { if (auto *I = dyn_cast(V)) { - GatherShuffleSeq.insert(I); + GatherShuffleExtractSeq.insert(I); CSEBlocks.insert(I->getParent()); } } @@ -8611,7 +8612,7 @@ Value *V = Builder.CreateShuffleVector(V0, V1, Mask); if (auto *I = dyn_cast(V)) { V = propagateMetadata(I, E->Scalars); - GatherShuffleSeq.insert(I); + GatherShuffleExtractSeq.insert(I); CSEBlocks.insert(I->getParent()); } V = ShuffleBuilder.finalize(V); @@ -8711,6 +8712,12 @@ } else { Ex = Builder.CreateExtractElement(Vec, Lane); } + // The then branch of the previous if may produce constants, since 0 + // operand might be a constant. + if (auto *ExI = dyn_cast(Ex)) { + GatherShuffleExtractSeq.insert(ExI); + CSEBlocks.insert(ExI->getParent()); + } // If necessary, sign-extend or zero-extend ScalarRoot // to the larger type. if (!MinBWs.count(ScalarRoot)) @@ -8740,7 +8747,6 @@ Builder.SetInsertPoint(&F->getEntryBlock().front()); } Value *NewInst = ExtractAndExtendIfNeeded(Vec); - CSEBlocks.insert(cast(Scalar)->getParent()); auto &NewInstLocs = ExternallyUsedValues[NewInst]; auto It = ExternallyUsedValues.find(Scalar); assert(It != ExternallyUsedValues.end() && @@ -8832,20 +8838,17 @@ Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator()); } Value *NewInst = ExtractAndExtendIfNeeded(Vec); - CSEBlocks.insert(PH->getIncomingBlock(i)); PH->setOperand(i, NewInst); } } } else { Builder.SetInsertPoint(cast(User)); Value *NewInst = ExtractAndExtendIfNeeded(Vec); - CSEBlocks.insert(cast(User)->getParent()); User->replaceUsesOfWith(Scalar, NewInst); } } else { Builder.SetInsertPoint(&F->getEntryBlock().front()); Value *NewInst = ExtractAndExtendIfNeeded(Vec); - CSEBlocks.insert(&F->getEntryBlock()); User->replaceUsesOfWith(Scalar, NewInst); } @@ -8959,7 +8962,7 @@ Op1, Op1 == Op2 ? PoisonValue::get(Op1->getType()) : Op2, CombinedMask1); if (auto *I = dyn_cast(Vec)) { - GatherShuffleSeq.insert(I); + GatherShuffleExtractSeq.insert(I); CSEBlocks.insert(I->getParent()); } return Vec; @@ -8974,7 +8977,7 @@ !IsIdentityMask(CombinedMask, cast(Op->getType()))) { Value *Vec = Builder.CreateShuffleVector(Op, CombinedMask); if (auto *I = dyn_cast(Vec)) { - GatherShuffleSeq.insert(I); + GatherShuffleExtractSeq.insert(I); CSEBlocks.insert(I->getParent()); } return Vec; @@ -9114,10 +9117,10 @@ } void BoUpSLP::optimizeGatherSequence() { - LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherShuffleSeq.size() + LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherShuffleExtractSeq.size() << " gather sequences instructions.\n"); // LICM InsertElementInst sequences. - for (Instruction *I : GatherShuffleSeq) { + for (Instruction *I : GatherShuffleExtractSeq) { if (isDeleted(I)) continue; @@ -9219,7 +9222,7 @@ if (isDeleted(&In)) continue; if (!isa(&In) && - !GatherShuffleSeq.contains(&In)) + !GatherShuffleExtractSeq.contains(&In)) continue; // Check if we can replace this instruction with any of the @@ -9238,7 +9241,7 @@ break; } if (isa(In) && isa(V) && - GatherShuffleSeq.contains(V) && + GatherShuffleExtractSeq.contains(V) && IsIdenticalOrLessDefined(V, &In, NewMask) && DT->dominates(In.getParent(), V->getParent())) { In.moveAfter(V); @@ -9259,7 +9262,7 @@ } } CSEBlocks.clear(); - GatherShuffleSeq.clear(); + GatherShuffleExtractSeq.clear(); } BoUpSLP::ScheduleData * diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cse_extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/cse_extractelement.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/cse_extractelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cse_extractelement.ll @@ -11,9 +11,9 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[S:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 ; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop1: -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 ; CHECK-NEXT: store i32 [[TMP3]], i32* [[S]], align 4 ; CHECK-NEXT: br i1 true, label [[LOOP1]], label [[CONT:%.*]] ; CHECK: cont: