Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 540 Lines • ▼ Show 20 Lines | public: | ||||
/// Construct the vector value of a scalarized value \p V one lane at a time. | /// Construct the vector value of a scalarized value \p V one lane at a time. | ||||
void packScalarIntoVectorValue(Value *V, const VPIteration &Instance); | void packScalarIntoVectorValue(Value *V, const VPIteration &Instance); | ||||
/// Try to vectorize interleaved access group \p Group with the base address | /// Try to vectorize interleaved access group \p Group with the base address | ||||
/// given in \p Addr, optionally masking the vector operations if \p | /// given in \p Addr, optionally masking the vector operations if \p | ||||
/// BlockInMask is non-null. Use \p State to translate given VPValues to IR | /// BlockInMask is non-null. Use \p State to translate given VPValues to IR | ||||
/// values in the vectorized loop. | /// values in the vectorized loop. | ||||
void vectorizeInterleaveGroup(const InterleaveGroup<Instruction> *Group, | void vectorizeInterleaveGroup(const InterleaveGroup<Instruction> *Group, | ||||
ArrayRef<VPValue *> VPDefs, | |||||
VPTransformState &State, VPValue *Addr, | VPTransformState &State, VPValue *Addr, | ||||
VPValue *BlockInMask = nullptr); | VPValue *BlockInMask = nullptr); | ||||
/// Vectorize Load and Store instructions with the base address given in \p | /// Vectorize Load and Store instructions with the base address given in \p | ||||
/// Addr, optionally masking the vector operations if \p BlockInMask is | /// Addr, optionally masking the vector operations if \p BlockInMask is | ||||
/// non-null. Use \p State to translate given VPValues to IR values in the | /// non-null. Use \p State to translate given VPValues to IR values in the | ||||
/// vectorized loop. | /// vectorized loop. | ||||
void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State, | void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State, | ||||
▲ Show 20 Lines • Show All 1,756 Lines • ▼ Show 20 Lines | |||||
// } | // } | ||||
// To: | // To: | ||||
// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7> | // %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7> | ||||
// %B_U.vec = shuffle %B.vec, undef, <0, 1, 2, 3, u, u, u, u> | // %B_U.vec = shuffle %B.vec, undef, <0, 1, 2, 3, u, u, u, u> | ||||
// %interleaved.vec = shuffle %R_G.vec, %B_U.vec, | // %interleaved.vec = shuffle %R_G.vec, %B_U.vec, | ||||
// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements | // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements | ||||
// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B | // store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B | ||||
void InnerLoopVectorizer::vectorizeInterleaveGroup( | void InnerLoopVectorizer::vectorizeInterleaveGroup( | ||||
const InterleaveGroup<Instruction> *Group, VPTransformState &State, | const InterleaveGroup<Instruction> *Group, ArrayRef<VPValue *> VPDefs, | ||||
VPValue *Addr, VPValue *BlockInMask) { | VPTransformState &State, VPValue *Addr, VPValue *BlockInMask) { | ||||
Instruction *Instr = Group->getInsertPos(); | Instruction *Instr = Group->getInsertPos(); | ||||
const DataLayout &DL = Instr->getModule()->getDataLayout(); | const DataLayout &DL = Instr->getModule()->getDataLayout(); | ||||
// Prepare for the vector type of the interleaved load/store. | // Prepare for the vector type of the interleaved load/store. | ||||
Type *ScalarTy = getMemInstValueType(Instr); | Type *ScalarTy = getMemInstValueType(Instr); | ||||
unsigned InterleaveFactor = Group->getFactor(); | unsigned InterleaveFactor = Group->getFactor(); | ||||
assert(!VF.isScalable() && "scalable vectors not yet supported."); | assert(!VF.isScalable() && "scalable vectors not yet supported."); | ||||
auto *VecTy = VectorType::get(ScalarTy, VF * InterleaveFactor); | auto *VecTy = VectorType::get(ScalarTy, VF * InterleaveFactor); | ||||
▲ Show 20 Lines • Show All 85 Lines • ▼ Show 20 Lines | for (unsigned Part = 0; Part < UF; Part++) { | ||||
NewLoad = Builder.CreateAlignedLoad(VecTy, AddrParts[Part], | NewLoad = Builder.CreateAlignedLoad(VecTy, AddrParts[Part], | ||||
Group->getAlign(), "wide.vec"); | Group->getAlign(), "wide.vec"); | ||||
Group->addMetadata(NewLoad); | Group->addMetadata(NewLoad); | ||||
NewLoads.push_back(NewLoad); | NewLoads.push_back(NewLoad); | ||||
} | } | ||||
// For each member in the group, shuffle out the appropriate data from the | // For each member in the group, shuffle out the appropriate data from the | ||||
// wide loads. | // wide loads. | ||||
unsigned J = 0; | |||||
for (unsigned I = 0; I < InterleaveFactor; ++I) { | for (unsigned I = 0; I < InterleaveFactor; ++I) { | ||||
Instruction *Member = Group->getMember(I); | Instruction *Member = Group->getMember(I); | ||||
// Skip the gaps in the group. | // Skip the gaps in the group. | ||||
if (!Member) | if (!Member) | ||||
continue; | continue; | ||||
assert(!VF.isScalable() && "scalable vectors not yet supported."); | assert(!VF.isScalable() && "scalable vectors not yet supported."); | ||||
auto StrideMask = | auto StrideMask = | ||||
createStrideMask(I, InterleaveFactor, VF.getKnownMinValue()); | createStrideMask(I, InterleaveFactor, VF.getKnownMinValue()); | ||||
for (unsigned Part = 0; Part < UF; Part++) { | for (unsigned Part = 0; Part < UF; Part++) { | ||||
Value *StridedVec = Builder.CreateShuffleVector( | Value *StridedVec = Builder.CreateShuffleVector( | ||||
NewLoads[Part], StrideMask, "strided.vec"); | NewLoads[Part], StrideMask, "strided.vec"); | ||||
// If this member has different type, cast the result type. | // If this member has different type, cast the result type. | ||||
if (Member->getType() != ScalarTy) { | if (Member->getType() != ScalarTy) { | ||||
assert(!VF.isScalable() && "VF is assumed to be non scalable."); | assert(!VF.isScalable() && "VF is assumed to be non scalable."); | ||||
VectorType *OtherVTy = VectorType::get(Member->getType(), VF); | VectorType *OtherVTy = VectorType::get(Member->getType(), VF); | ||||
StridedVec = createBitOrPointerCast(StridedVec, OtherVTy, DL); | StridedVec = createBitOrPointerCast(StridedVec, OtherVTy, DL); | ||||
} | } | ||||
if (Group->isReverse()) | if (Group->isReverse()) | ||||
StridedVec = reverseVector(StridedVec); | StridedVec = reverseVector(StridedVec); | ||||
VectorLoopValueMap.setVectorValue(Member, Part, StridedVec); | State.set(VPDefs[J], Member, StridedVec, Part); | ||||
} | } | ||||
++J; | |||||
} | } | ||||
return; | return; | ||||
} | } | ||||
// The sub vector type for current instruction. | // The sub vector type for current instruction. | ||||
assert(!VF.isScalable() && "VF is assumed to be non scalable."); | assert(!VF.isScalable() && "VF is assumed to be non scalable."); | ||||
auto *SubVT = VectorType::get(ScalarTy, VF); | auto *SubVT = VectorType::get(ScalarTy, VF); | ||||
▲ Show 20 Lines • Show All 4,820 Lines • ▼ Show 20 Lines | for (auto *Predecessor : predecessors(BB)) { | ||||
} | } | ||||
BlockMask = Builder.createOr(BlockMask, EdgeMask); | BlockMask = Builder.createOr(BlockMask, EdgeMask); | ||||
} | } | ||||
return BlockMaskCache[BB] = BlockMask; | return BlockMaskCache[BB] = BlockMask; | ||||
} | } | ||||
VPWidenMemoryInstructionRecipe * | VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, | ||||
VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, | |||||
VPlanPtr &Plan) { | VPlanPtr &Plan) { | ||||
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) && | assert((isa<LoadInst>(I) || isa<StoreInst>(I)) && | ||||
"Must be called with either a load or store"); | "Must be called with either a load or store"); | ||||
auto willWiden = [&](ElementCount VF) -> bool { | auto willWiden = [&](ElementCount VF) -> bool { | ||||
assert(!VF.isScalable() && "unexpected scalable ElementCount"); | assert(!VF.isScalable() && "unexpected scalable ElementCount"); | ||||
if (VF.isScalar()) | if (VF.isScalar()) | ||||
return false; | return false; | ||||
LoopVectorizationCostModel::InstWidening Decision = | LoopVectorizationCostModel::InstWidening Decision = | ||||
Show All 11 Lines | VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, | ||||
if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range)) | if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range)) | ||||
return nullptr; | return nullptr; | ||||
VPValue *Mask = nullptr; | VPValue *Mask = nullptr; | ||||
if (Legal->isMaskRequired(I)) | if (Legal->isMaskRequired(I)) | ||||
Mask = createBlockInMask(I->getParent(), Plan); | Mask = createBlockInMask(I->getParent(), Plan); | ||||
VPValue *Addr = Plan->getOrAddVPValue(getLoadStorePointerOperand(I)); | VPValue *Addr = Plan->getOrAddVPValue(getLoadStorePointerOperand(I)); | ||||
auto II = InsertPtToGroup.find(I); | |||||
if (II != InsertPtToGroup.end()) { | |||||
auto *IG = II->second; | |||||
auto *InterleaveG = new VPInterleaveRecipe(IG, Addr, Mask); | |||||
// If this is a load interleave group, add sub-values for each member. | |||||
dmgreen: Should this be surrounded by some sort of "if it's a load IG" check?
Equally, do we need to be… | |||||
Right, we only need to add VPValues for load groups, updated. I should probably also add a check that we do not add VPValues to a plan with underlying instructions/values of type void. fhahn: Right, we only need to add VPValues for load groups, updated. I should probably also add a… | |||||
if (isa<LoadInst>(I)) { | |||||
unsigned j = 0; | |||||
for (unsigned i = 0; i < IG->getFactor(); i++) | |||||
if (Instruction *Member = IG->getMember(i)) { | |||||
Plan->addVPValue(Member, InterleaveG->getResult(j)); | |||||
j++; | |||||
} | |||||
} | |||||
return InterleaveG; | |||||
} | |||||
if (LoadInst *Load = dyn_cast<LoadInst>(I)) | if (LoadInst *Load = dyn_cast<LoadInst>(I)) | ||||
return new VPWidenMemoryInstructionRecipe(*Load, Addr, Mask); | return new VPWidenMemoryInstructionRecipe(*Load, Addr, Mask); | ||||
StoreInst *Store = cast<StoreInst>(I); | StoreInst *Store = cast<StoreInst>(I); | ||||
VPValue *StoredValue = Plan->getOrAddVPValue(Store->getValueOperand()); | VPValue *StoredValue = Plan->getOrAddVPValue(Store->getValueOperand()); | ||||
return new VPWidenMemoryInstructionRecipe(*Store, Addr, StoredValue, Mask); | return new VPWidenMemoryInstructionRecipe(*Store, Addr, StoredValue, Mask); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 332 Lines • ▼ Show 20 Lines | VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( | ||||
const DenseMap<Instruction *, Instruction *> &SinkAfter) { | const DenseMap<Instruction *, Instruction *> &SinkAfter) { | ||||
// Hold a mapping from predicated instructions to their recipes, in order to | // Hold a mapping from predicated instructions to their recipes, in order to | ||||
// fix their AlsoPack behavior if a user is determined to replicate and use a | // fix their AlsoPack behavior if a user is determined to replicate and use a | ||||
// scalar instead of vector value. | // scalar instead of vector value. | ||||
DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe; | DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe; | ||||
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups; | SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups; | ||||
SmallPtrSet<Instruction *, 8> DeadInterleaveGroupMembers; | |||||
VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, PSE, Builder); | VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, PSE, Builder); | ||||
// --------------------------------------------------------------------------- | // --------------------------------------------------------------------------- | ||||
// Pre-construction: record ingredients whose recipes we'll need to further | // Pre-construction: record ingredients whose recipes we'll need to further | ||||
// process after constructing the initial VPlan. | // process after constructing the initial VPlan. | ||||
// --------------------------------------------------------------------------- | // --------------------------------------------------------------------------- | ||||
Show All 29 Lines | for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) { | ||||
auto applyIG = [IG, this](ElementCount VF) -> bool { | auto applyIG = [IG, this](ElementCount VF) -> bool { | ||||
return (VF.isVector() && // Query is illegal for VF == 1 | return (VF.isVector() && // Query is illegal for VF == 1 | ||||
CM.getWideningDecision(IG->getInsertPos(), VF) == | CM.getWideningDecision(IG->getInsertPos(), VF) == | ||||
LoopVectorizationCostModel::CM_Interleave); | LoopVectorizationCostModel::CM_Interleave); | ||||
}; | }; | ||||
if (!getDecisionAndClampRange(applyIG, Range)) | if (!getDecisionAndClampRange(applyIG, Range)) | ||||
continue; | continue; | ||||
InterleaveGroups.insert(IG); | InterleaveGroups.insert(IG); | ||||
RecipeBuilder.recordInterleaveGroup(IG); | |||||
for (unsigned i = 0; i < IG->getFactor(); i++) | for (unsigned i = 0; i < IG->getFactor(); i++) | ||||
if (Instruction *Member = IG->getMember(i)) | if (Instruction *Member = IG->getMember(i)) { | ||||
RecipeBuilder.recordRecipeOf(Member); | RecipeBuilder.recordRecipeOf(Member); | ||||
if (Member != IG->getInsertPos()) | |||||
DeadInterleaveGroupMembers.insert(Member); | |||||
} | |||||
}; | }; | ||||
auto skipDeadInterleaveMembers = | |||||
[&DeadInterleaveGroupMembers](Instruction *I) { | |||||
BasicBlock *BB = I->getParent(); | |||||
for (auto &I : make_range(I->getIterator(), BB->end())) | |||||
if (!DeadInterleaveGroupMembers.contains(&I)) | |||||
return &I; | |||||
llvm_unreachable("Need to find a valid insert point"); | |||||
}; | |||||
// Mark instructions we'll need to sink later and their targets as | |||||
// ingredients whose recipe we'll need to record. | |||||
for (auto &Entry : SinkAfter) { | |||||
RecipeBuilder.recordRecipeOf(skipDeadInterleaveMembers(Entry.first)); | |||||
RecipeBuilder.recordRecipeOf(skipDeadInterleaveMembers(Entry.second)); | |||||
} | |||||
// --------------------------------------------------------------------------- | // --------------------------------------------------------------------------- | ||||
// Build initial VPlan: Scan the body of the loop in a topological order to | // Build initial VPlan: Scan the body of the loop in a topological order to | ||||
// visit each basic block after having visited its predecessor basic blocks. | // visit each basic block after having visited its predecessor basic blocks. | ||||
// --------------------------------------------------------------------------- | // --------------------------------------------------------------------------- | ||||
// Create a dummy pre-entry VPBasicBlock to start building the VPlan. | // Create a dummy pre-entry VPBasicBlock to start building the VPlan. | ||||
auto Plan = std::make_unique<VPlan>(); | auto Plan = std::make_unique<VPlan>(); | ||||
VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry"); | VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry"); | ||||
Show All 19 Lines | for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) { | ||||
// Introduce each ingredient into VPlan. | // Introduce each ingredient into VPlan. | ||||
// TODO: Model and preserve debug instrinsics in VPlan. | // TODO: Model and preserve debug instrinsics in VPlan. | ||||
for (Instruction &I : BB->instructionsWithoutDebug()) { | for (Instruction &I : BB->instructionsWithoutDebug()) { | ||||
Instruction *Instr = &I; | Instruction *Instr = &I; | ||||
// First filter out irrelevant instructions, to ensure no recipes are | // First filter out irrelevant instructions, to ensure no recipes are | ||||
// built for them. | // built for them. | ||||
if (isa<BranchInst>(Instr) || DeadInstructions.count(Instr)) | if (isa<BranchInst>(Instr) || DeadInstructions.count(Instr) || | ||||
DeadInterleaveGroupMembers.contains(Instr)) | |||||
continue; | continue; | ||||
if (auto Recipe = | if (auto Recipe = | ||||
RecipeBuilder.tryToCreateWidenRecipe(Instr, Range, Plan)) { | RecipeBuilder.tryToCreateWidenRecipe(Instr, Range, Plan)) { | ||||
// Check if the recipe can be converted to a VPValue. We need the extra | // Check if the recipe can be converted to a VPValue. We need the extra | ||||
// down-casting step until VPRecipeBase inherits from VPValue. | // down-casting step until VPRecipeBase inherits from VPValue. | ||||
VPValue *MaybeVPValue = Recipe->toVPValue(); | VPValue *MaybeVPValue = Recipe->toVPValue(); | ||||
if (!Instr->getType()->isVoidTy() && MaybeVPValue) { | if (!Instr->getType()->isVoidTy() && MaybeVPValue) { | ||||
Show All 31 Lines | VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( | ||||
// --------------------------------------------------------------------------- | // --------------------------------------------------------------------------- | ||||
// Transform initial VPlan: Apply previously taken decisions, in order, to | // Transform initial VPlan: Apply previously taken decisions, in order, to | ||||
// bring the VPlan to its final state. | // bring the VPlan to its final state. | ||||
// --------------------------------------------------------------------------- | // --------------------------------------------------------------------------- | ||||
// Apply Sink-After legal constraints. | // Apply Sink-After legal constraints. | ||||
for (auto &Entry : SinkAfter) { | for (auto &Entry : SinkAfter) { | ||||
VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first); | VPRecipeBase *Sink = | ||||
VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second); | RecipeBuilder.getRecipe(skipDeadInterleaveMembers(Entry.first)); | ||||
VPRecipeBase *Target = | |||||
RecipeBuilder.getRecipe(skipDeadInterleaveMembers(Entry.second)); | |||||
Sink->moveAfter(Target); | Sink->moveAfter(Target); | ||||
} | } | ||||
// Interleave memory: for each Interleave Group we marked earlier as relevant | |||||
// for this VPlan, replace the Recipes widening its memory instructions with a | |||||
// single VPInterleaveRecipe at its insertion point. | |||||
for (auto IG : InterleaveGroups) { | |||||
auto *Recipe = cast<VPWidenMemoryInstructionRecipe>( | |||||
RecipeBuilder.getRecipe(IG->getInsertPos())); | |||||
(new VPInterleaveRecipe(IG, Recipe->getAddr(), Recipe->getMask())) | |||||
->insertBefore(Recipe); | |||||
for (unsigned i = 0; i < IG->getFactor(); ++i) | |||||
if (Instruction *Member = IG->getMember(i)) { | |||||
VPValue *NewVPV = nullptr; | |||||
if (!Member->getType()->isVoidTy()) { | |||||
NewVPV = new VPValue(Member); | |||||
Plan->getVPValue(Member)->replaceAllUsesWith(NewVPV); | |||||
} | |||||
RecipeBuilder.getRecipe(Member)->eraseFromParent(); | |||||
if (NewVPV) | |||||
Plan->addVPValue(Member, NewVPV); | |||||
} | |||||
} | |||||
// Adjust the recipes for any inloop reductions. | // Adjust the recipes for any inloop reductions. | ||||
if (Range.Start > 1) | if (Range.Start > 1) | ||||
adjustRecipesForInLoopReductions(Plan, RecipeBuilder); | adjustRecipesForInLoopReductions(Plan, RecipeBuilder); | ||||
// Finally, if tail is folded by masking, introduce selects between the phi | // Finally, if tail is folded by masking, introduce selects between the phi | ||||
// and the live-out instruction of each reduction, at the end of the latch. | // and the live-out instruction of each reduction, at the end of the latch. | ||||
if (CM.foldTailByMasking() && !Legal->getReductionVars().empty()) { | if (CM.foldTailByMasking() && !Legal->getReductionVars().empty()) { | ||||
Builder.setInsertPoint(VPBB); | Builder.setInsertPoint(VPBB); | ||||
▲ Show 20 Lines • Show All 201 Lines • ▼ Show 20 Lines | for (unsigned In = 0; In < NumIncoming; ++In) { | ||||
} | } | ||||
} | } | ||||
for (unsigned Part = 0; Part < State.UF; ++Part) | for (unsigned Part = 0; Part < State.UF; ++Part) | ||||
State.ValueMap.setVectorValue(Phi, Part, Entry[Part]); | State.ValueMap.setVectorValue(Phi, Part, Entry[Part]); | ||||
} | } | ||||
void VPInterleaveRecipe::execute(VPTransformState &State) { | void VPInterleaveRecipe::execute(VPTransformState &State) { | ||||
assert(!State.Instance && "Interleave group being replicated."); | assert(!State.Instance && "Interleave group being replicated."); | ||||
State.ILV->vectorizeInterleaveGroup(IG, State, getAddr(), getMask()); | State.ILV->vectorizeInterleaveGroup(IG, getDefs(), State, getAddr(), | ||||
getMask()); | |||||
} | } | ||||
void VPReductionRecipe::execute(VPTransformState &State) { | void VPReductionRecipe::execute(VPTransformState &State) { | ||||
assert(!State.Instance && "Reduction being replicated."); | assert(!State.Instance && "Reduction being replicated."); | ||||
for (unsigned Part = 0; Part < State.UF; ++Part) { | for (unsigned Part = 0; Part < State.UF; ++Part) { | ||||
unsigned Kind = RdxDesc->getRecurrenceKind(); | unsigned Kind = RdxDesc->getRecurrenceKind(); | ||||
Value *NewVecOp = State.get(VecOp, Part); | Value *NewVecOp = State.get(VecOp, Part); | ||||
Value *NewRed = | Value *NewRed = | ||||
▲ Show 20 Lines • Show All 631 Lines • Show Last 20 Lines |
Should this be surrounded by some sort of "if it's a load IG" check?
Equally, do we need to be adding the operands for a store IG group?