Please use GitHub pull requests for new patches. Avoid migrating existing patches. Phabricator shutdown timeline
Changeset View
Changeset View
Standalone View
Standalone View
llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 957 Lines • ▼ Show 20 Lines | public: | ||||
/// groups. Substitute symbolic strides using \p Strides. | /// groups. Substitute symbolic strides using \p Strides. | ||||
void analyzeInterleaving(const ValueToValueMap &Strides); | void analyzeInterleaving(const ValueToValueMap &Strides); | ||||
/// \brief Check if \p Instr belongs to any interleave group. | /// \brief Check if \p Instr belongs to any interleave group. | ||||
bool isInterleaved(Instruction *Instr) const { | bool isInterleaved(Instruction *Instr) const { | ||||
return InterleaveGroupMap.count(Instr); | return InterleaveGroupMap.count(Instr); | ||||
} | } | ||||
/// \brief Return the maximum interleave factor of all interleaved groups. | |||||
unsigned getMaxInterleaveFactor() const { | |||||
unsigned MaxFactor = 1; | |||||
for (auto &Entry : InterleaveGroupMap) | |||||
MaxFactor = std::max(MaxFactor, Entry.second->getFactor()); | |||||
return MaxFactor; | |||||
} | |||||
/// \brief Get the interleave group that \p Instr belongs to. | /// \brief Get the interleave group that \p Instr belongs to. | ||||
/// | /// | ||||
/// \returns nullptr if doesn't have such group. | /// \returns nullptr if doesn't have such group. | ||||
InterleaveGroup *getInterleaveGroup(Instruction *Instr) const { | InterleaveGroup *getInterleaveGroup(Instruction *Instr) const { | ||||
if (InterleaveGroupMap.count(Instr)) | if (InterleaveGroupMap.count(Instr)) | ||||
return InterleaveGroupMap.find(Instr)->second; | return InterleaveGroupMap.find(Instr)->second; | ||||
return nullptr; | return nullptr; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 566 Lines • ▼ Show 20 Lines | public: | ||||
const LoopAccessInfo *getLAI() const { return LAI; } | const LoopAccessInfo *getLAI() const { return LAI; } | ||||
/// \brief Check if \p Instr belongs to any interleaved access group. | /// \brief Check if \p Instr belongs to any interleaved access group. | ||||
bool isAccessInterleaved(Instruction *Instr) { | bool isAccessInterleaved(Instruction *Instr) { | ||||
return InterleaveInfo.isInterleaved(Instr); | return InterleaveInfo.isInterleaved(Instr); | ||||
} | } | ||||
/// \brief Return the maximum interleave factor of all interleaved groups. | |||||
unsigned getMaxInterleaveFactor() const { | |||||
return InterleaveInfo.getMaxInterleaveFactor(); | |||||
} | |||||
/// \brief Get the interleaved access group that \p Instr belongs to. | /// \brief Get the interleaved access group that \p Instr belongs to. | ||||
const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) { | const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) { | ||||
return InterleaveInfo.getInterleaveGroup(Instr); | return InterleaveInfo.getInterleaveGroup(Instr); | ||||
} | } | ||||
/// \brief Returns true if an interleaved group requires a scalar iteration | /// \brief Returns true if an interleaved group requires a scalar iteration | ||||
/// to handle accesses with gaps. | /// to handle accesses with gaps. | ||||
bool requiresScalarEpilogue() const { | bool requiresScalarEpilogue() const { | ||||
return InterleaveInfo.requiresScalarEpilogue(); | return InterleaveInfo.requiresScalarEpilogue(); | ||||
} | } | ||||
unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); } | unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); } | ||||
uint64_t getMaxSafeRegisterWidth() const { | |||||
return LAI->getDepChecker().getMaxSafeRegisterWidth(); | |||||
} | |||||
bool hasStride(Value *V) { return LAI->hasStride(V); } | bool hasStride(Value *V) { return LAI->hasStride(V); } | ||||
/// Returns true if the target machine supports masked store operation | /// Returns true if the target machine supports masked store operation | ||||
/// for the given \p DataType and kind of access to \p Ptr. | /// for the given \p DataType and kind of access to \p Ptr. | ||||
bool isLegalMaskedStore(Type *DataType, Value *Ptr) { | bool isLegalMaskedStore(Type *DataType, Value *Ptr) { | ||||
return isConsecutivePtr(Ptr) && TTI->isLegalMaskedStore(DataType); | return isConsecutivePtr(Ptr) && TTI->isLegalMaskedStore(DataType); | ||||
} | } | ||||
/// Returns true if the target machine supports masked load operation | /// Returns true if the target machine supports masked load operation | ||||
▲ Show 20 Lines • Show All 4,490 Lines • ▼ Show 20 Lines | for (auto AI = std::next(BI); AI != E; ++AI) { | ||||
if (A->mayReadFromMemory()) | if (A->mayReadFromMemory()) | ||||
Group->setInsertPos(A); | Group->setInsertPos(A); | ||||
} | } | ||||
} // Iteration over A accesses. | } // Iteration over A accesses. | ||||
} // Iteration over B accesses. | } // Iteration over B accesses. | ||||
// Remove interleaved store groups with gaps. | // Remove interleaved store groups with gaps. | ||||
for (InterleaveGroup *Group : StoreGroups) | for (InterleaveGroup *Group : StoreGroups) | ||||
if (Group->getNumMembers() != Group->getFactor()) | if (Group->getNumMembers() != Group->getFactor()) { | ||||
DEBUG(dbgs() << "LV: Invalidate candidate interleaved store group due " | |||||
"to gaps.\n"); | |||||
releaseGroup(Group); | releaseGroup(Group); | ||||
} | |||||
// Remove interleaved groups with gaps (currently only loads) whose memory | // Remove interleaved groups with gaps (currently only loads) whose memory | ||||
// accesses may wrap around. We have to revisit the getPtrStride analysis, | // accesses may wrap around. We have to revisit the getPtrStride analysis, | ||||
// this time with ShouldCheckWrap=true, since collectConstStrideAccesses does | // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does | ||||
// not check wrapping (see documentation there). | // not check wrapping (see documentation there). | ||||
// FORNOW we use Assume=false; | // FORNOW we use Assume=false; | ||||
// TODO: Change to Assume=true but making sure we don't exceed the threshold | // TODO: Change to Assume=true but making sure we don't exceed the threshold | ||||
// of runtime SCEV assumptions checks (thereby potentially failing to | // of runtime SCEV assumptions checks (thereby potentially failing to | ||||
// vectorize altogether). | // vectorize altogether). | ||||
Show All 36 Lines | if (LastMember) { | ||||
} | } | ||||
} else { | } else { | ||||
// Case 3: A non-reversed interleaved load group with gaps: We need | // Case 3: A non-reversed interleaved load group with gaps: We need | ||||
// to execute at least one scalar epilogue iteration. This will ensure | // to execute at least one scalar epilogue iteration. This will ensure | ||||
// we don't speculatively access memory out-of-bounds. We only need | // we don't speculatively access memory out-of-bounds. We only need | ||||
// to look for a member at index factor - 1, since every group must have | // to look for a member at index factor - 1, since every group must have | ||||
// a member at index zero. | // a member at index zero. | ||||
if (Group->isReverse()) { | if (Group->isReverse()) { | ||||
DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to " | |||||
"a reverse access with gaps.\n"); | |||||
releaseGroup(Group); | releaseGroup(Group); | ||||
continue; | continue; | ||||
} | } | ||||
DEBUG(dbgs() << "LV: Interleaved group requires epilogue iteration.\n"); | DEBUG(dbgs() << "LV: Interleaved group requires epilogue iteration.\n"); | ||||
RequiresScalarEpilogue = true; | RequiresScalarEpilogue = true; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
▲ Show 20 Lines • Show All 67 Lines • ▼ Show 20 Lines | |||||
unsigned | unsigned | ||||
LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize, | LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize, | ||||
unsigned ConstTripCount) { | unsigned ConstTripCount) { | ||||
MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI); | MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI); | ||||
unsigned SmallestType, WidestType; | unsigned SmallestType, WidestType; | ||||
std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes(); | std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes(); | ||||
unsigned WidestRegister = TTI.getRegisterBitWidth(true); | unsigned WidestRegister = TTI.getRegisterBitWidth(true); | ||||
unsigned MaxSafeDepDist = -1U; | |||||
// Get the maximum safe dependence distance in bits computed by LAA. If the | // Get the maximum safe dependence distance in bits computed by LAA. | ||||
// loop contains any interleaved accesses, we divide the dependence distance | // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from | ||||
// by the maximum interleave factor of all interleaved groups. Note that | // the memory accesses that is most restrictive (involved in the smallest | ||||
// although the division ensures correctness, this is a fairly conservative | // dependence distance). | ||||
// computation because the maximum distance computed by LAA may not involve | unsigned MaxSafeRegisterWidth = Legal->getMaxSafeRegisterWidth(); | ||||
// any of the interleaved accesses. | |||||
if (Legal->getMaxSafeDepDistBytes() != -1U) | WidestRegister = std::min(WidestRegister, MaxSafeRegisterWidth); | ||||
MaxSafeDepDist = | |||||
Legal->getMaxSafeDepDistBytes() * 8 / Legal->getMaxInterleaveFactor(); | |||||
WidestRegister = | |||||
((WidestRegister < MaxSafeDepDist) ? WidestRegister : MaxSafeDepDist); | |||||
unsigned MaxVectorSize = WidestRegister / WidestType; | unsigned MaxVectorSize = WidestRegister / WidestType; | ||||
DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType << " / " | DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType << " / " | ||||
<< WidestType << " bits.\n"); | << WidestType << " bits.\n"); | ||||
DEBUG(dbgs() << "LV: The Widest register is: " << WidestRegister | DEBUG(dbgs() << "LV: The Widest register safe to use is: " << WidestRegister | ||||
<< " bits.\n"); | << " bits.\n"); | ||||
assert(MaxVectorSize <= 64 && "Did not expect to pack so many elements" | assert(MaxVectorSize <= 64 && "Did not expect to pack so many elements" | ||||
" into one vector!"); | " into one vector!"); | ||||
if (MaxVectorSize == 0) { | if (MaxVectorSize == 0) { | ||||
DEBUG(dbgs() << "LV: The target has no vector registers.\n"); | DEBUG(dbgs() << "LV: The target has no vector registers.\n"); | ||||
MaxVectorSize = 1; | MaxVectorSize = 1; | ||||
return MaxVectorSize; | return MaxVectorSize; | ||||
▲ Show 20 Lines • Show All 2,496 Lines • Show Last 20 Lines |