Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 2,708 Lines • ▼ Show 20 Lines | #endif | ||||
/// This list holds pairs of (Internal Scalar : External User). External User | /// This list holds pairs of (Internal Scalar : External User). External User | ||||
/// can be nullptr, it means that this Internal Scalar will be used later, | /// can be nullptr, it means that this Internal Scalar will be used later, | ||||
/// after vectorization. | /// after vectorization. | ||||
UserList ExternalUses; | UserList ExternalUses; | ||||
/// Values used only by @llvm.assume calls. | /// Values used only by @llvm.assume calls. | ||||
SmallPtrSet<const Value *, 32> EphValues; | SmallPtrSet<const Value *, 32> EphValues; | ||||
/// Holds all of the instructions that we gathered. | /// Holds all of the instructions that we gathered, shuffle instructions and | ||||
SetVector<Instruction *> GatherShuffleSeq; | /// extractelements. | ||||
SetVector<Instruction *> GatherShuffleExtractSeq; | |||||
/// A list of blocks that we are going to CSE. | /// A list of blocks that we are going to CSE. | ||||
SetVector<BasicBlock *> CSEBlocks; | SetVector<BasicBlock *> CSEBlocks; | ||||
/// Contains all scheduling relevant data for an instruction. | /// Contains all scheduling relevant data for an instruction. | ||||
/// A ScheduleData either represents a single instruction or a member of an | /// A ScheduleData either represents a single instruction or a member of an | ||||
/// instruction bundle (= a group of instructions which is combined into a | /// instruction bundle (= a group of instructions which is combined into a | ||||
/// vector instruction). | /// vector instruction). | ||||
▲ Show 20 Lines • Show All 5,054 Lines • ▼ Show 20 Lines | if (auto *Inst = dyn_cast<Instruction>(VL[I])) | ||||
PostponedInsts.emplace_back(Inst, I); | PostponedInsts.emplace_back(Inst, I); | ||||
} | } | ||||
auto &&CreateInsertElement = [this](Value *Vec, Value *V, unsigned Pos) { | auto &&CreateInsertElement = [this](Value *Vec, Value *V, unsigned Pos) { | ||||
Vec = Builder.CreateInsertElement(Vec, V, Builder.getInt32(Pos)); | Vec = Builder.CreateInsertElement(Vec, V, Builder.getInt32(Pos)); | ||||
auto *InsElt = dyn_cast<InsertElementInst>(Vec); | auto *InsElt = dyn_cast<InsertElementInst>(Vec); | ||||
if (!InsElt) | if (!InsElt) | ||||
return Vec; | return Vec; | ||||
GatherShuffleSeq.insert(InsElt); | GatherShuffleExtractSeq.insert(InsElt); | ||||
CSEBlocks.insert(InsElt->getParent()); | CSEBlocks.insert(InsElt->getParent()); | ||||
// Add to our 'need-to-extract' list. | // Add to our 'need-to-extract' list. | ||||
if (TreeEntry *Entry = getTreeEntry(V)) { | if (TreeEntry *Entry = getTreeEntry(V)) { | ||||
// Find which lane we need to extract. | // Find which lane we need to extract. | ||||
unsigned FoundLane = Entry->findLaneForValue(V); | unsigned FoundLane = Entry->findLaneForValue(V); | ||||
ExternalUses.emplace_back(V, InsElt, FoundLane); | ExternalUses.emplace_back(V, InsElt, FoundLane); | ||||
} | } | ||||
return Vec; | return Vec; | ||||
▲ Show 20 Lines • Show All 137 Lines • ▼ Show 20 Lines | if (TreeEntry *E = getTreeEntry(S.OpValue)) | ||||
assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() && | assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() && | ||||
"Expected vectorization factor less " | "Expected vectorization factor less " | ||||
"than original vector size."); | "than original vector size."); | ||||
SmallVector<int> UniformMask(VF, 0); | SmallVector<int> UniformMask(VF, 0); | ||||
std::iota(UniformMask.begin(), UniformMask.end(), 0); | std::iota(UniformMask.begin(), UniformMask.end(), 0); | ||||
V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle"); | V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle"); | ||||
} | } | ||||
if (auto *I = dyn_cast<Instruction>(V)) { | if (auto *I = dyn_cast<Instruction>(V)) { | ||||
GatherShuffleSeq.insert(I); | GatherShuffleExtractSeq.insert(I); | ||||
CSEBlocks.insert(I->getParent()); | CSEBlocks.insert(I->getParent()); | ||||
} | } | ||||
} | } | ||||
return V; | return V; | ||||
} | } | ||||
} | } | ||||
// Can't vectorize this, so simply build a new vector with each lane | // Can't vectorize this, so simply build a new vector with each lane | ||||
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | if (UniqueVals == 1 && UniqueValues.size() == 1) { | ||||
UniqueValues.clear(); | UniqueValues.clear(); | ||||
UniqueValues.append(VL.begin(), std::next(VL.begin(), NumValues)); | UniqueValues.append(VL.begin(), std::next(VL.begin(), NumValues)); | ||||
} | } | ||||
UniqueValues.append(VF - UniqueValues.size(), | UniqueValues.append(VF - UniqueValues.size(), | ||||
PoisonValue::get(VL[0]->getType())); | PoisonValue::get(VL[0]->getType())); | ||||
VL = UniqueValues; | VL = UniqueValues; | ||||
} | } | ||||
ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq, | ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleExtractSeq, | ||||
CSEBlocks); | CSEBlocks); | ||||
Value *Vec = gather(VL); | Value *Vec = gather(VL); | ||||
if (!ReuseShuffleIndicies.empty()) { | if (!ReuseShuffleIndicies.empty()) { | ||||
ShuffleBuilder.addMask(ReuseShuffleIndicies); | ShuffleBuilder.addMask(ReuseShuffleIndicies); | ||||
Vec = ShuffleBuilder.finalize(Vec); | Vec = ShuffleBuilder.finalize(Vec); | ||||
} | } | ||||
return Vec; | return Vec; | ||||
} | } | ||||
Value *BoUpSLP::vectorizeTree(TreeEntry *E) { | Value *BoUpSLP::vectorizeTree(TreeEntry *E) { | ||||
IRBuilder<>::InsertPointGuard Guard(Builder); | IRBuilder<>::InsertPointGuard Guard(Builder); | ||||
if (E->VectorizedValue) { | if (E->VectorizedValue) { | ||||
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n"); | LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n"); | ||||
return E->VectorizedValue; | return E->VectorizedValue; | ||||
} | } | ||||
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); | bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); | ||||
unsigned VF = E->getVectorFactor(); | unsigned VF = E->getVectorFactor(); | ||||
ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq, | ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleExtractSeq, | ||||
CSEBlocks); | CSEBlocks); | ||||
if (E->State == TreeEntry::NeedToGather) { | if (E->State == TreeEntry::NeedToGather) { | ||||
if (E->getMainOp()) | if (E->getMainOp()) | ||||
setInsertPointAfterBundle(E); | setInsertPointAfterBundle(E); | ||||
Value *Vec; | Value *Vec; | ||||
SmallVector<int> Mask; | SmallVector<int> Mask; | ||||
SmallVector<const TreeEntry *> Entries; | SmallVector<const TreeEntry *> Entries; | ||||
Optional<TargetTransformInfo::ShuffleKind> Shuffle = | Optional<TargetTransformInfo::ShuffleKind> Shuffle = | ||||
isGatherShuffledEntry(E, Mask, Entries); | isGatherShuffledEntry(E, Mask, Entries); | ||||
if (Shuffle) { | if (Shuffle) { | ||||
assert((Entries.size() == 1 || Entries.size() == 2) && | assert((Entries.size() == 1 || Entries.size() == 2) && | ||||
"Expected shuffle of 1 or 2 entries."); | "Expected shuffle of 1 or 2 entries."); | ||||
Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue, | Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue, | ||||
Entries.back()->VectorizedValue, Mask); | Entries.back()->VectorizedValue, Mask); | ||||
if (auto *I = dyn_cast<Instruction>(Vec)) { | if (auto *I = dyn_cast<Instruction>(Vec)) { | ||||
GatherShuffleSeq.insert(I); | GatherShuffleExtractSeq.insert(I); | ||||
CSEBlocks.insert(I->getParent()); | CSEBlocks.insert(I->getParent()); | ||||
} | } | ||||
} else { | } else { | ||||
Vec = gather(E->Scalars); | Vec = gather(E->Scalars); | ||||
} | } | ||||
if (NeedToShuffleReuses) { | if (NeedToShuffleReuses) { | ||||
ShuffleBuilder.addMask(E->ReuseShuffleIndices); | ShuffleBuilder.addMask(E->ReuseShuffleIndices); | ||||
Vec = ShuffleBuilder.finalize(Vec); | Vec = ShuffleBuilder.finalize(Vec); | ||||
▲ Show 20 Lines • Show All 115 Lines • ▼ Show 20 Lines | case Instruction::InsertElement: { | ||||
Value *Scalar = E->Scalars[PrevMask[I]]; | Value *Scalar = E->Scalars[PrevMask[I]]; | ||||
unsigned InsertIdx = *getInsertIndex(Scalar); | unsigned InsertIdx = *getInsertIndex(Scalar); | ||||
IsIdentity &= InsertIdx - Offset == I; | IsIdentity &= InsertIdx - Offset == I; | ||||
Mask[InsertIdx - Offset] = I; | Mask[InsertIdx - Offset] = I; | ||||
} | } | ||||
if (!IsIdentity || NumElts != NumScalars) { | if (!IsIdentity || NumElts != NumScalars) { | ||||
V = Builder.CreateShuffleVector(V, Mask); | V = Builder.CreateShuffleVector(V, Mask); | ||||
if (auto *I = dyn_cast<Instruction>(V)) { | if (auto *I = dyn_cast<Instruction>(V)) { | ||||
GatherShuffleSeq.insert(I); | GatherShuffleExtractSeq.insert(I); | ||||
CSEBlocks.insert(I->getParent()); | CSEBlocks.insert(I->getParent()); | ||||
} | } | ||||
} | } | ||||
SmallVector<int> InsertMask(NumElts, UndefMaskElem); | SmallVector<int> InsertMask(NumElts, UndefMaskElem); | ||||
for (unsigned I = 0; I < NumElts; I++) { | for (unsigned I = 0; I < NumElts; I++) { | ||||
if (Mask[I] != UndefMaskElem) | if (Mask[I] != UndefMaskElem) | ||||
InsertMask[Offset + I] = I; | InsertMask[Offset + I] = I; | ||||
} | } | ||||
bool IsFirstUndef = isUndefVector(FirstInsert->getOperand(0), InsertMask); | bool IsFirstUndef = isUndefVector(FirstInsert->getOperand(0), InsertMask); | ||||
if ((!IsIdentity || Offset != 0 || !IsFirstUndef) && | if ((!IsIdentity || Offset != 0 || !IsFirstUndef) && | ||||
NumElts != NumScalars) { | NumElts != NumScalars) { | ||||
if (IsFirstUndef) { | if (IsFirstUndef) { | ||||
if (!ShuffleVectorInst::isIdentityMask(InsertMask)) { | if (!ShuffleVectorInst::isIdentityMask(InsertMask)) { | ||||
V = Builder.CreateShuffleVector( | V = Builder.CreateShuffleVector( | ||||
V, InsertMask, cast<Instruction>(E->Scalars.back())->getName()); | V, InsertMask, cast<Instruction>(E->Scalars.back())->getName()); | ||||
if (auto *I = dyn_cast<Instruction>(V)) { | if (auto *I = dyn_cast<Instruction>(V)) { | ||||
GatherShuffleSeq.insert(I); | GatherShuffleExtractSeq.insert(I); | ||||
CSEBlocks.insert(I->getParent()); | CSEBlocks.insert(I->getParent()); | ||||
} | } | ||||
// Create freeze for undef values. | // Create freeze for undef values. | ||||
if (!isa<PoisonValue>(FirstInsert->getOperand(0))) | if (!isa<PoisonValue>(FirstInsert->getOperand(0))) | ||||
V = Builder.CreateFreeze(V); | V = Builder.CreateFreeze(V); | ||||
} | } | ||||
} else { | } else { | ||||
for (unsigned I = 0; I < NumElts; I++) { | for (unsigned I = 0; I < NumElts; I++) { | ||||
if (InsertMask[I] == UndefMaskElem) | if (InsertMask[I] == UndefMaskElem) | ||||
InsertMask[I] = I; | InsertMask[I] = I; | ||||
else | else | ||||
InsertMask[I] += NumElts; | InsertMask[I] += NumElts; | ||||
} | } | ||||
V = Builder.CreateShuffleVector( | V = Builder.CreateShuffleVector( | ||||
FirstInsert->getOperand(0), V, InsertMask, | FirstInsert->getOperand(0), V, InsertMask, | ||||
cast<Instruction>(E->Scalars.back())->getName()); | cast<Instruction>(E->Scalars.back())->getName()); | ||||
if (auto *I = dyn_cast<Instruction>(V)) { | if (auto *I = dyn_cast<Instruction>(V)) { | ||||
GatherShuffleSeq.insert(I); | GatherShuffleExtractSeq.insert(I); | ||||
CSEBlocks.insert(I->getParent()); | CSEBlocks.insert(I->getParent()); | ||||
} | } | ||||
} | } | ||||
} | } | ||||
++NumVectorInstructions; | ++NumVectorInstructions; | ||||
E->VectorizedValue = V; | E->VectorizedValue = V; | ||||
return V; | return V; | ||||
▲ Show 20 Lines • Show All 361 Lines • ▼ Show 20 Lines | case Instruction::ShuffleVector: { | ||||
static_cast<Instruction::CastOps>(E->getOpcode()), LHS, VecTy); | static_cast<Instruction::CastOps>(E->getOpcode()), LHS, VecTy); | ||||
V1 = Builder.CreateCast( | V1 = Builder.CreateCast( | ||||
static_cast<Instruction::CastOps>(E->getAltOpcode()), LHS, VecTy); | static_cast<Instruction::CastOps>(E->getAltOpcode()), LHS, VecTy); | ||||
} | } | ||||
// Add V0 and V1 to later analysis to try to find and remove matching | // Add V0 and V1 to later analysis to try to find and remove matching | ||||
// instruction, if any. | // instruction, if any. | ||||
for (Value *V : {V0, V1}) { | for (Value *V : {V0, V1}) { | ||||
if (auto *I = dyn_cast<Instruction>(V)) { | if (auto *I = dyn_cast<Instruction>(V)) { | ||||
GatherShuffleSeq.insert(I); | GatherShuffleExtractSeq.insert(I); | ||||
CSEBlocks.insert(I->getParent()); | CSEBlocks.insert(I->getParent()); | ||||
} | } | ||||
} | } | ||||
// Create shuffle to take alternate operations from the vector. | // Create shuffle to take alternate operations from the vector. | ||||
// Also, gather up main and alt scalar ops to propagate IR flags to | // Also, gather up main and alt scalar ops to propagate IR flags to | ||||
// each vector operation. | // each vector operation. | ||||
ValueList OpScalars, AltScalars; | ValueList OpScalars, AltScalars; | ||||
SmallVector<int> Mask; | SmallVector<int> Mask; | ||||
buildShuffleEntryMask( | buildShuffleEntryMask( | ||||
E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices, | E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices, | ||||
[E](Instruction *I) { | [E](Instruction *I) { | ||||
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); | assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); | ||||
return isAlternateInstruction(I, E->getMainOp(), E->getAltOp()); | return isAlternateInstruction(I, E->getMainOp(), E->getAltOp()); | ||||
}, | }, | ||||
Mask, &OpScalars, &AltScalars); | Mask, &OpScalars, &AltScalars); | ||||
propagateIRFlags(V0, OpScalars); | propagateIRFlags(V0, OpScalars); | ||||
propagateIRFlags(V1, AltScalars); | propagateIRFlags(V1, AltScalars); | ||||
Value *V = Builder.CreateShuffleVector(V0, V1, Mask); | Value *V = Builder.CreateShuffleVector(V0, V1, Mask); | ||||
if (auto *I = dyn_cast<Instruction>(V)) { | if (auto *I = dyn_cast<Instruction>(V)) { | ||||
V = propagateMetadata(I, E->Scalars); | V = propagateMetadata(I, E->Scalars); | ||||
GatherShuffleSeq.insert(I); | GatherShuffleExtractSeq.insert(I); | ||||
CSEBlocks.insert(I->getParent()); | CSEBlocks.insert(I->getParent()); | ||||
} | } | ||||
V = ShuffleBuilder.finalize(V); | V = ShuffleBuilder.finalize(V); | ||||
E->VectorizedValue = V; | E->VectorizedValue = V; | ||||
++NumVectorInstructions; | ++NumVectorInstructions; | ||||
return V; | return V; | ||||
▲ Show 20 Lines • Show All 83 Lines • ▼ Show 20 Lines | auto ExtractAndExtendIfNeeded = [&](Value *Vec) { | ||||
Value *Ex; | Value *Ex; | ||||
// "Reuse" the existing extract to improve final codegen. | // "Reuse" the existing extract to improve final codegen. | ||||
if (auto *ES = dyn_cast<ExtractElementInst>(Scalar)) { | if (auto *ES = dyn_cast<ExtractElementInst>(Scalar)) { | ||||
Ex = Builder.CreateExtractElement(ES->getOperand(0), | Ex = Builder.CreateExtractElement(ES->getOperand(0), | ||||
ES->getOperand(1)); | ES->getOperand(1)); | ||||
} else { | } else { | ||||
Ex = Builder.CreateExtractElement(Vec, Lane); | Ex = Builder.CreateExtractElement(Vec, Lane); | ||||
} | } | ||||
// The then branch of the previous if may produce constants, since 0 | |||||
vdmitrie: At this point Ex is guaranteed to be an instruction. So why dyn_cast? | |||||
Just want to be safe, remember some conflicts with possible ConstExprs as operands. I can change it to cast ABataev: Just want to be safe, remember some conflicts with possible ConstExprs as operands. I can… | |||||
Not Done ReplyInline ActionsI don't think such "safety" measure has good justification. If for some reason the cast fires an assertion it will be easy to understand what happened. Otherwise you would allow the issue to go further which sometimes might result in hard to trace performance issues. vdmitrie: I don't think such "safety" measure has good justification. If for some reason the cast fires… | |||||
// operand might be a constant. | |||||
if (auto *ExI = dyn_cast<Instruction>(Ex)) { | |||||
GatherShuffleExtractSeq.insert(ExI); | |||||
CSEBlocks.insert(ExI->getParent()); | |||||
} | |||||
// If necessary, sign-extend or zero-extend ScalarRoot | // If necessary, sign-extend or zero-extend ScalarRoot | ||||
// to the larger type. | // to the larger type. | ||||
if (!MinBWs.count(ScalarRoot)) | if (!MinBWs.count(ScalarRoot)) | ||||
return Ex; | return Ex; | ||||
if (MinBWs[ScalarRoot].second) | if (MinBWs[ScalarRoot].second) | ||||
return Builder.CreateSExt(Ex, Scalar->getType()); | return Builder.CreateSExt(Ex, Scalar->getType()); | ||||
return Builder.CreateZExt(Ex, Scalar->getType()); | return Builder.CreateZExt(Ex, Scalar->getType()); | ||||
} | } | ||||
Show All 12 Lines | if (!User) { | ||||
"Scalar with nullptr as an external user must be registered in " | "Scalar with nullptr as an external user must be registered in " | ||||
"ExternallyUsedValues map"); | "ExternallyUsedValues map"); | ||||
if (auto *VecI = dyn_cast<Instruction>(Vec)) { | if (auto *VecI = dyn_cast<Instruction>(Vec)) { | ||||
Builder.SetInsertPoint(VecI->getParent(), | Builder.SetInsertPoint(VecI->getParent(), | ||||
std::next(VecI->getIterator())); | std::next(VecI->getIterator())); | ||||
} else { | } else { | ||||
Builder.SetInsertPoint(&F->getEntryBlock().front()); | Builder.SetInsertPoint(&F->getEntryBlock().front()); | ||||
} | } | ||||
Value *NewInst = ExtractAndExtendIfNeeded(Vec); | Value *NewInst = ExtractAndExtendIfNeeded(Vec); | ||||
CSEBlocks.insert(cast<Instruction>(Scalar)->getParent()); | |||||
auto &NewInstLocs = ExternallyUsedValues[NewInst]; | auto &NewInstLocs = ExternallyUsedValues[NewInst]; | ||||
Not Done ReplyInline Actionssame seems applies here. vdmitrie: same seems applies here. | |||||
auto It = ExternallyUsedValues.find(Scalar); | auto It = ExternallyUsedValues.find(Scalar); | ||||
assert(It != ExternallyUsedValues.end() && | assert(It != ExternallyUsedValues.end() && | ||||
"Externally used scalar is not found in ExternallyUsedValues"); | "Externally used scalar is not found in ExternallyUsedValues"); | ||||
NewInstLocs.append(It->second); | NewInstLocs.append(It->second); | ||||
ExternallyUsedValues.erase(Scalar); | ExternallyUsedValues.erase(Scalar); | ||||
// Required to update internally referenced instructions. | // Required to update internally referenced instructions. | ||||
Scalar->replaceAllUsesWith(NewInst); | Scalar->replaceAllUsesWith(NewInst); | ||||
continue; | continue; | ||||
▲ Show 20 Lines • Show All 73 Lines • ▼ Show 20 Lines | if (auto *VecI = dyn_cast<Instruction>(Vec)) { | ||||
Instruction *IncomingTerminator = | Instruction *IncomingTerminator = | ||||
PH->getIncomingBlock(i)->getTerminator(); | PH->getIncomingBlock(i)->getTerminator(); | ||||
if (isa<CatchSwitchInst>(IncomingTerminator)) { | if (isa<CatchSwitchInst>(IncomingTerminator)) { | ||||
Builder.SetInsertPoint(VecI->getParent(), | Builder.SetInsertPoint(VecI->getParent(), | ||||
std::next(VecI->getIterator())); | std::next(VecI->getIterator())); | ||||
} else { | } else { | ||||
Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator()); | Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator()); | ||||
} | } | ||||
Value *NewInst = ExtractAndExtendIfNeeded(Vec); | Value *NewInst = ExtractAndExtendIfNeeded(Vec); | ||||
CSEBlocks.insert(PH->getIncomingBlock(i)); | |||||
PH->setOperand(i, NewInst); | PH->setOperand(i, NewInst); | ||||
Not Done ReplyInline Actionsand here vdmitrie: and here | |||||
} | } | ||||
} | } | ||||
} else { | } else { | ||||
Builder.SetInsertPoint(cast<Instruction>(User)); | Builder.SetInsertPoint(cast<Instruction>(User)); | ||||
Value *NewInst = ExtractAndExtendIfNeeded(Vec); | Value *NewInst = ExtractAndExtendIfNeeded(Vec); | ||||
CSEBlocks.insert(cast<Instruction>(User)->getParent()); | |||||
User->replaceUsesOfWith(Scalar, NewInst); | User->replaceUsesOfWith(Scalar, NewInst); | ||||
} | } | ||||
} else { | } else { | ||||
Builder.SetInsertPoint(&F->getEntryBlock().front()); | Builder.SetInsertPoint(&F->getEntryBlock().front()); | ||||
Value *NewInst = ExtractAndExtendIfNeeded(Vec); | Value *NewInst = ExtractAndExtendIfNeeded(Vec); | ||||
CSEBlocks.insert(&F->getEntryBlock()); | |||||
User->replaceUsesOfWith(Scalar, NewInst); | User->replaceUsesOfWith(Scalar, NewInst); | ||||
} | } | ||||
LLVM_DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n"); | LLVM_DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n"); | ||||
} | } | ||||
// Checks if the mask is an identity mask. | // Checks if the mask is an identity mask. | ||||
auto &&IsIdentityMask = [](ArrayRef<int> Mask, FixedVectorType *VecTy) { | auto &&IsIdentityMask = [](ArrayRef<int> Mask, FixedVectorType *VecTy) { | ||||
▲ Show 20 Lines • Show All 97 Lines • ▼ Show 20 Lines | if (V2 && !isUndefVector(V2, Mask)) { | ||||
"Expected undefined mask element"); | "Expected undefined mask element"); | ||||
CombinedMask1[I] = CombinedMask2[I] + (Op1 == Op2 ? 0 : VF); | CombinedMask1[I] = CombinedMask2[I] + (Op1 == Op2 ? 0 : VF); | ||||
} | } | ||||
} | } | ||||
Value *Vec = Builder.CreateShuffleVector( | Value *Vec = Builder.CreateShuffleVector( | ||||
Op1, Op1 == Op2 ? PoisonValue::get(Op1->getType()) : Op2, | Op1, Op1 == Op2 ? PoisonValue::get(Op1->getType()) : Op2, | ||||
CombinedMask1); | CombinedMask1); | ||||
if (auto *I = dyn_cast<Instruction>(Vec)) { | if (auto *I = dyn_cast<Instruction>(Vec)) { | ||||
GatherShuffleSeq.insert(I); | GatherShuffleExtractSeq.insert(I); | ||||
CSEBlocks.insert(I->getParent()); | CSEBlocks.insert(I->getParent()); | ||||
} | } | ||||
return Vec; | return Vec; | ||||
} | } | ||||
if (isa<PoisonValue>(V1)) | if (isa<PoisonValue>(V1)) | ||||
return PoisonValue::get(FixedVectorType::get( | return PoisonValue::get(FixedVectorType::get( | ||||
cast<VectorType>(V1->getType())->getElementType(), Mask.size())); | cast<VectorType>(V1->getType())->getElementType(), Mask.size())); | ||||
Value *Op = V1; | Value *Op = V1; | ||||
SmallVector<int> CombinedMask(Mask); | SmallVector<int> CombinedMask(Mask); | ||||
PeekThroughShuffles(Op, CombinedMask); | PeekThroughShuffles(Op, CombinedMask); | ||||
if (!isa<FixedVectorType>(Op->getType()) || | if (!isa<FixedVectorType>(Op->getType()) || | ||||
!IsIdentityMask(CombinedMask, cast<FixedVectorType>(Op->getType()))) { | !IsIdentityMask(CombinedMask, cast<FixedVectorType>(Op->getType()))) { | ||||
Value *Vec = Builder.CreateShuffleVector(Op, CombinedMask); | Value *Vec = Builder.CreateShuffleVector(Op, CombinedMask); | ||||
if (auto *I = dyn_cast<Instruction>(Vec)) { | if (auto *I = dyn_cast<Instruction>(Vec)) { | ||||
GatherShuffleSeq.insert(I); | GatherShuffleExtractSeq.insert(I); | ||||
CSEBlocks.insert(I->getParent()); | CSEBlocks.insert(I->getParent()); | ||||
} | } | ||||
return Vec; | return Vec; | ||||
} | } | ||||
return Op; | return Op; | ||||
}; | }; | ||||
auto &&ResizeToVF = [&CreateShuffle](Value *Vec, ArrayRef<int> Mask, | auto &&ResizeToVF = [&CreateShuffle](Value *Vec, ArrayRef<int> Mask, | ||||
▲ Show 20 Lines • Show All 123 Lines • ▼ Show 20 Lines | #endif | ||||
Builder.ClearInsertionPoint(); | Builder.ClearInsertionPoint(); | ||||
InstrElementSize.clear(); | InstrElementSize.clear(); | ||||
return VectorizableTree[0]->VectorizedValue; | return VectorizableTree[0]->VectorizedValue; | ||||
} | } | ||||
void BoUpSLP::optimizeGatherSequence() { | void BoUpSLP::optimizeGatherSequence() { | ||||
LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherShuffleSeq.size() | LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherShuffleExtractSeq.size() | ||||
<< " gather sequences instructions.\n"); | << " gather sequences instructions.\n"); | ||||
// LICM InsertElementInst sequences. | // LICM InsertElementInst sequences. | ||||
for (Instruction *I : GatherShuffleSeq) { | for (Instruction *I : GatherShuffleExtractSeq) { | ||||
if (isDeleted(I)) | if (isDeleted(I)) | ||||
continue; | continue; | ||||
// Check if this block is inside a loop. | // Check if this block is inside a loop. | ||||
Loop *L = LI->getLoopFor(I->getParent()); | Loop *L = LI->getLoopFor(I->getParent()); | ||||
if (!L) | if (!L) | ||||
continue; | continue; | ||||
▲ Show 20 Lines • Show All 85 Lines • ▼ Show 20 Lines | assert(*I && | ||||
(I == CSEWorkList.begin() || !DT->dominates(*I, *std::prev(I))) && | (I == CSEWorkList.begin() || !DT->dominates(*I, *std::prev(I))) && | ||||
"Worklist not sorted properly!"); | "Worklist not sorted properly!"); | ||||
BasicBlock *BB = (*I)->getBlock(); | BasicBlock *BB = (*I)->getBlock(); | ||||
// For all instructions in blocks containing gather sequences: | // For all instructions in blocks containing gather sequences: | ||||
for (Instruction &In : llvm::make_early_inc_range(*BB)) { | for (Instruction &In : llvm::make_early_inc_range(*BB)) { | ||||
if (isDeleted(&In)) | if (isDeleted(&In)) | ||||
continue; | continue; | ||||
if (!isa<InsertElementInst, ExtractElementInst, ShuffleVectorInst>(&In) && | if (!isa<InsertElementInst, ExtractElementInst, ShuffleVectorInst>(&In) && | ||||
!GatherShuffleSeq.contains(&In)) | !GatherShuffleExtractSeq.contains(&In)) | ||||
continue; | continue; | ||||
// Check if we can replace this instruction with any of the | // Check if we can replace this instruction with any of the | ||||
// visited instructions. | // visited instructions. | ||||
bool Replaced = false; | bool Replaced = false; | ||||
for (Instruction *&V : Visited) { | for (Instruction *&V : Visited) { | ||||
SmallVector<int> NewMask; | SmallVector<int> NewMask; | ||||
if (IsIdenticalOrLessDefined(&In, V, NewMask) && | if (IsIdenticalOrLessDefined(&In, V, NewMask) && | ||||
DT->dominates(V->getParent(), In.getParent())) { | DT->dominates(V->getParent(), In.getParent())) { | ||||
In.replaceAllUsesWith(V); | In.replaceAllUsesWith(V); | ||||
eraseInstruction(&In); | eraseInstruction(&In); | ||||
if (auto *SI = dyn_cast<ShuffleVectorInst>(V)) | if (auto *SI = dyn_cast<ShuffleVectorInst>(V)) | ||||
if (!NewMask.empty()) | if (!NewMask.empty()) | ||||
SI->setShuffleMask(NewMask); | SI->setShuffleMask(NewMask); | ||||
Replaced = true; | Replaced = true; | ||||
break; | break; | ||||
} | } | ||||
if (isa<ShuffleVectorInst>(In) && isa<ShuffleVectorInst>(V) && | if (isa<ShuffleVectorInst>(In) && isa<ShuffleVectorInst>(V) && | ||||
GatherShuffleSeq.contains(V) && | GatherShuffleExtractSeq.contains(V) && | ||||
IsIdenticalOrLessDefined(V, &In, NewMask) && | IsIdenticalOrLessDefined(V, &In, NewMask) && | ||||
DT->dominates(In.getParent(), V->getParent())) { | DT->dominates(In.getParent(), V->getParent())) { | ||||
In.moveAfter(V); | In.moveAfter(V); | ||||
V->replaceAllUsesWith(&In); | V->replaceAllUsesWith(&In); | ||||
eraseInstruction(V); | eraseInstruction(V); | ||||
if (auto *SI = dyn_cast<ShuffleVectorInst>(&In)) | if (auto *SI = dyn_cast<ShuffleVectorInst>(&In)) | ||||
if (!NewMask.empty()) | if (!NewMask.empty()) | ||||
SI->setShuffleMask(NewMask); | SI->setShuffleMask(NewMask); | ||||
V = &In; | V = &In; | ||||
Replaced = true; | Replaced = true; | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
if (!Replaced) { | if (!Replaced) { | ||||
assert(!is_contained(Visited, &In)); | assert(!is_contained(Visited, &In)); | ||||
Visited.push_back(&In); | Visited.push_back(&In); | ||||
} | } | ||||
} | } | ||||
} | } | ||||
CSEBlocks.clear(); | CSEBlocks.clear(); | ||||
GatherShuffleSeq.clear(); | GatherShuffleExtractSeq.clear(); | ||||
} | } | ||||
BoUpSLP::ScheduleData * | BoUpSLP::ScheduleData * | ||||
BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL) { | BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL) { | ||||
ScheduleData *Bundle = nullptr; | ScheduleData *Bundle = nullptr; | ||||
ScheduleData *PrevInBundle = nullptr; | ScheduleData *PrevInBundle = nullptr; | ||||
for (Value *V : VL) { | for (Value *V : VL) { | ||||
if (doesNotNeedToBeScheduled(V)) | if (doesNotNeedToBeScheduled(V)) | ||||
▲ Show 20 Lines • Show All 3,463 Lines • Show Last 20 Lines |
At this point Ex is guaranteed to be an instruction. So why dyn_cast?