Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 593 Lines • ▼ Show 20 Lines | protected: | ||||
void fixCrossIterationPHIs(VPTransformState &State); | void fixCrossIterationPHIs(VPTransformState &State); | ||||
/// Fix a first-order recurrence. This is the second phase of vectorizing | /// Fix a first-order recurrence. This is the second phase of vectorizing | ||||
/// this phi node. | /// this phi node. | ||||
void fixFirstOrderRecurrence(PHINode *Phi, VPTransformState &State); | void fixFirstOrderRecurrence(PHINode *Phi, VPTransformState &State); | ||||
/// Fix a reduction cross-iteration phi. This is the second phase of | /// Fix a reduction cross-iteration phi. This is the second phase of | ||||
/// vectorizing this phi node. | /// vectorizing this phi node. | ||||
void fixReduction(PHINode *Phi, VPTransformState &State); | void fixReduction(VPWidenPHIRecipe *Phi, VPTransformState &State); | ||||
/// Clear NSW/NUW flags from reduction instructions if necessary. | /// Clear NSW/NUW flags from reduction instructions if necessary. | ||||
void clearReductionWrapFlags(RecurrenceDescriptor &RdxDesc, | void clearReductionWrapFlags(RecurrenceDescriptor &RdxDesc, | ||||
VPTransformState &State); | VPTransformState &State); | ||||
/// Fixup the LCSSA phi nodes in the unique exit block. This simply | /// Fixup the LCSSA phi nodes in the unique exit block. This simply | ||||
/// means we need to add the appropriate incoming value from the middle | /// means we need to add the appropriate incoming value from the middle | ||||
/// block as exiting edges from the scalar epilogue loop (if present) are | /// block as exiting edges from the scalar epilogue loop (if present) are | ||||
▲ Show 20 Lines • Show All 3,449 Lines • ▼ Show 20 Lines | |||||
void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) { | void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) { | ||||
// In order to support recurrences we need to be able to vectorize Phi nodes. | // In order to support recurrences we need to be able to vectorize Phi nodes. | ||||
// Phi nodes have cycles, so we need to vectorize them in two stages. This is | // Phi nodes have cycles, so we need to vectorize them in two stages. This is | ||||
// stage #2: We now need to fix the recurrences by adding incoming edges to | // stage #2: We now need to fix the recurrences by adding incoming edges to | ||||
// the currently empty PHI nodes. At this point every instruction in the | // the currently empty PHI nodes. At this point every instruction in the | ||||
// original loop is widened to a vector form so we can use them to construct | // original loop is widened to a vector form so we can use them to construct | ||||
// the incoming edges. | // the incoming edges. | ||||
for (PHINode &Phi : OrigLoop->getHeader()->phis()) { | VPBasicBlock *Header = State.Plan->getEntry()->getEntryBasicBlock(); | ||||
// Handle first-order recurrences and reductions that need to be fixed. | for (VPRecipeBase &R : Header->phis()) { | ||||
Ayal: iterate over Header->phis()? | |||||
This should be updated now I think. fhahn: This should be updated now I think. | |||||
if (Legal->isFirstOrderRecurrence(&Phi)) | auto *PhiR = dyn_cast<VPWidenPHIRecipe>(&R); | ||||
fixFirstOrderRecurrence(&Phi, State); | if (!PhiR) | ||||
else if (Legal->isReductionVariable(&Phi)) | continue; | ||||
fixReduction(&Phi, State); | auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue()); | ||||
if (PhiR->getRecurrenceDescriptor()) { | |||||
fixReduction(PhiR, State); | |||||
} else if (Legal->isFirstOrderRecurrence(OrigPhi)) | |||||
fixFirstOrderRecurrence(OrigPhi, State); | |||||
} | } | ||||
} | } | ||||
void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi, | void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi, | ||||
VPTransformState &State) { | VPTransformState &State) { | ||||
// This is the second phase of vectorizing first-order recurrences. An | // This is the second phase of vectorizing first-order recurrences. An | ||||
// overview of the transformation is described below. Suppose we have the | // overview of the transformation is described below. Suppose we have the | ||||
// following loop. | // following loop. | ||||
▲ Show 20 Lines • Show All 178 Lines • ▼ Show 20 Lines | if (any_of(LCSSAPhi.incoming_values(), | ||||
[Phi](Value *V) { return V == Phi; })) | [Phi](Value *V) { return V == Phi; })) | ||||
LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock); | LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock); | ||||
} | } | ||||
static bool useOrderedReductions(RecurrenceDescriptor &RdxDesc) { | static bool useOrderedReductions(RecurrenceDescriptor &RdxDesc) { | ||||
return EnableStrictReductions && RdxDesc.isOrdered(); | return EnableStrictReductions && RdxDesc.isOrdered(); | ||||
} | } | ||||
void InnerLoopVectorizer::fixReduction(PHINode *Phi, VPTransformState &State) { | void InnerLoopVectorizer::fixReduction(VPWidenPHIRecipe *PhiR, | ||||
VPTransformState &State) { | |||||
PHINode *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue()); | |||||
// Get it's reduction variable descriptor. | // Get it's reduction variable descriptor. | ||||
assert(Legal->isReductionVariable(Phi) && | assert(Legal->isReductionVariable(OrigPhi) && | ||||
"Unable to find the reduction variable"); | "Unable to find the reduction variable"); | ||||
RecurrenceDescriptor RdxDesc = Legal->getReductionVars()[Phi]; | RecurrenceDescriptor RdxDesc = *PhiR->getRecurrenceDescriptor(); | ||||
RecurKind RK = RdxDesc.getRecurrenceKind(); | RecurKind RK = RdxDesc.getRecurrenceKind(); | ||||
TrackingVH<Value> ReductionStartValue = RdxDesc.getRecurrenceStartValue(); | TrackingVH<Value> ReductionStartValue = RdxDesc.getRecurrenceStartValue(); | ||||
Instruction *LoopExitInst = RdxDesc.getLoopExitInstr(); | Instruction *LoopExitInst = RdxDesc.getLoopExitInstr(); | ||||
setDebugLocFromInst(Builder, ReductionStartValue); | setDebugLocFromInst(Builder, ReductionStartValue); | ||||
bool IsInLoopReductionPhi = Cost->isInLoopReduction(Phi); | bool IsInLoopReductionPhi = Cost->isInLoopReduction(OrigPhi); | ||||
VPValue *LoopExitInstDef = State.Plan->getVPValue(LoopExitInst); | VPValue *LoopExitInstDef = State.Plan->getVPValue(LoopExitInst); | ||||
// This is the vector-clone of the value that leaves the loop. | // This is the vector-clone of the value that leaves the loop. | ||||
Type *VecTy = State.get(LoopExitInstDef, 0)->getType(); | Type *VecTy = State.get(LoopExitInstDef, 0)->getType(); | ||||
// Wrap flags are in general invalid after vectorization, clear them. | // Wrap flags are in general invalid after vectorization, clear them. | ||||
clearReductionWrapFlags(RdxDesc, State); | clearReductionWrapFlags(RdxDesc, State); | ||||
// Fix the vector-loop phi. | // Fix the vector-loop phi. | ||||
// Reductions do not have to start at zero. They can start with | // Reductions do not have to start at zero. They can start with | ||||
// any loop invariant values. | // any loop invariant values. | ||||
BasicBlock *OrigLatch = OrigLoop->getLoopLatch(); | BasicBlock *OrigLatch = OrigLoop->getLoopLatch(); | ||||
Value *OrigLoopVal = Phi->getIncomingValueForBlock(OrigLatch); | Value *OrigLoopVal = OrigPhi->getIncomingValueForBlock(OrigLatch); | ||||
BasicBlock *VectorLoopLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch(); | BasicBlock *VectorLoopLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch(); | ||||
bool IsOrdered = State.VF.isVector() && IsInLoopReductionPhi && | bool IsOrdered = State.VF.isVector() && IsInLoopReductionPhi && | ||||
useOrderedReductions(RdxDesc); | useOrderedReductions(RdxDesc); | ||||
for (unsigned Part = 0; Part < UF; ++Part) { | for (unsigned Part = 0; Part < UF; ++Part) { | ||||
if (IsOrdered && Part > 0) | if (IsOrdered && Part > 0) | ||||
break; | break; | ||||
Value *VecRdxPhi = State.get(State.Plan->getVPValue(Phi), Part); | Value *VecRdxPhi = State.get(PhiR->getVPSingleValue(), Part); | ||||
Value *Val = State.get(State.Plan->getVPValue(OrigLoopVal), Part); | Value *Val = State.get(State.Plan->getVPValue(OrigLoopVal), Part); | ||||
if (IsOrdered) | if (IsOrdered) | ||||
Val = State.get(State.Plan->getVPValue(OrigLoopVal), UF - 1); | Val = State.get(State.Plan->getVPValue(OrigLoopVal), UF - 1); | ||||
cast<PHINode>(VecRdxPhi)->addIncoming(Val, VectorLoopLatch); | cast<PHINode>(VecRdxPhi)->addIncoming(Val, VectorLoopLatch); | ||||
} | } | ||||
// Before each round, move the insertion point right between | // Before each round, move the insertion point right between | ||||
// the PHIs and the values we are going to write. | // the PHIs and the values we are going to write. | ||||
// This allows us to write both PHINodes and the extractelement | // This allows us to write both PHINodes and the extractelement | ||||
// instructions. | // instructions. | ||||
Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt()); | Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt()); | ||||
setDebugLocFromInst(Builder, LoopExitInst); | setDebugLocFromInst(Builder, LoopExitInst); | ||||
Type *PhiTy = Phi->getType(); | Type *PhiTy = OrigPhi->getType(); | ||||
// If tail is folded by masking, the vector value to leave the loop should be | // If tail is folded by masking, the vector value to leave the loop should be | ||||
// a Select choosing between the vectorized LoopExitInst and vectorized Phi, | // a Select choosing between the vectorized LoopExitInst and vectorized Phi, | ||||
// instead of the former. For an inloop reduction the reduction will already | // instead of the former. For an inloop reduction the reduction will already | ||||
// be predicated, and does not need to be handled here. | // be predicated, and does not need to be handled here. | ||||
if (Cost->foldTailByMasking() && !IsInLoopReductionPhi) { | if (Cost->foldTailByMasking() && !IsInLoopReductionPhi) { | ||||
for (unsigned Part = 0; Part < UF; ++Part) { | for (unsigned Part = 0; Part < UF; ++Part) { | ||||
Value *VecLoopExitInst = State.get(LoopExitInstDef, Part); | Value *VecLoopExitInst = State.get(LoopExitInstDef, Part); | ||||
Value *Sel = nullptr; | Value *Sel = nullptr; | ||||
Show All 12 Lines | for (unsigned Part = 0; Part < UF; ++Part) { | ||||
// cheaper for the select to remain in the loop than be sunk out of it, | // cheaper for the select to remain in the loop than be sunk out of it, | ||||
// and so use the select value for the phi instead of the old | // and so use the select value for the phi instead of the old | ||||
// LoopExitValue. | // LoopExitValue. | ||||
if (PreferPredicatedReductionSelect || | if (PreferPredicatedReductionSelect || | ||||
TTI->preferPredicatedReductionSelect( | TTI->preferPredicatedReductionSelect( | ||||
RdxDesc.getOpcode(), PhiTy, | RdxDesc.getOpcode(), PhiTy, | ||||
TargetTransformInfo::ReductionFlags())) { | TargetTransformInfo::ReductionFlags())) { | ||||
auto *VecRdxPhi = | auto *VecRdxPhi = | ||||
cast<PHINode>(State.get(State.Plan->getVPValue(Phi), Part)); | cast<PHINode>(State.get(PhiR->getVPSingleValue(), Part)); | ||||
VecRdxPhi->setIncomingValueForBlock( | VecRdxPhi->setIncomingValueForBlock( | ||||
LI->getLoopFor(LoopVectorBody)->getLoopLatch(), Sel); | LI->getLoopFor(LoopVectorBody)->getLoopLatch(), Sel); | ||||
} | } | ||||
} | } | ||||
} | } | ||||
// If the vector reduction can be performed in a smaller type, we truncate | // If the vector reduction can be performed in a smaller type, we truncate | ||||
// then extend the loop exit value to enable InstCombine to evaluate the | // then extend the loop exit value to enable InstCombine to evaluate the | ||||
▲ Show 20 Lines • Show All 85 Lines • ▼ Show 20 Lines | void InnerLoopVectorizer::fixReduction(VPWidenPHIRecipe *PhiR, | ||||
for (PHINode &LCSSAPhi : LoopExitBlock->phis()) | for (PHINode &LCSSAPhi : LoopExitBlock->phis()) | ||||
if (any_of(LCSSAPhi.incoming_values(), | if (any_of(LCSSAPhi.incoming_values(), | ||||
[LoopExitInst](Value *V) { return V == LoopExitInst; })) | [LoopExitInst](Value *V) { return V == LoopExitInst; })) | ||||
LCSSAPhi.addIncoming(ReducedPartRdx, LoopMiddleBlock); | LCSSAPhi.addIncoming(ReducedPartRdx, LoopMiddleBlock); | ||||
// Fix the scalar loop reduction variable with the incoming reduction sum | // Fix the scalar loop reduction variable with the incoming reduction sum | ||||
// from the vector body and from the backedge value. | // from the vector body and from the backedge value. | ||||
int IncomingEdgeBlockIdx = | int IncomingEdgeBlockIdx = | ||||
Phi->getBasicBlockIndex(OrigLoop->getLoopLatch()); | OrigPhi->getBasicBlockIndex(OrigLoop->getLoopLatch()); | ||||
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index"); | assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index"); | ||||
// Pick the other block. | // Pick the other block. | ||||
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); | int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); | ||||
Phi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi); | OrigPhi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi); | ||||
Phi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst); | OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst); | ||||
} | } | ||||
void InnerLoopVectorizer::clearReductionWrapFlags(RecurrenceDescriptor &RdxDesc, | void InnerLoopVectorizer::clearReductionWrapFlags(RecurrenceDescriptor &RdxDesc, | ||||
VPTransformState &State) { | VPTransformState &State) { | ||||
RecurKind RK = RdxDesc.getRecurrenceKind(); | RecurKind RK = RdxDesc.getRecurrenceKind(); | ||||
if (RK != RecurKind::Add && RK != RecurKind::Mul) | if (RK != RecurKind::Add && RK != RecurKind::Mul) | ||||
return; | return; | ||||
▲ Show 20 Lines • Show All 5,661 Lines • Show Last 20 Lines |
iterate over Header->phis()?