Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Analysis/LoopAccessAnalysis.cpp
Show First 20 Lines • Show All 41 Lines • ▼ Show 20 Lines | |||||
#include "llvm/IR/DiagnosticInfo.h" | #include "llvm/IR/DiagnosticInfo.h" | ||||
#include "llvm/IR/Dominators.h" | #include "llvm/IR/Dominators.h" | ||||
#include "llvm/IR/Function.h" | #include "llvm/IR/Function.h" | ||||
#include "llvm/IR/InstrTypes.h" | #include "llvm/IR/InstrTypes.h" | ||||
#include "llvm/IR/Instruction.h" | #include "llvm/IR/Instruction.h" | ||||
#include "llvm/IR/Instructions.h" | #include "llvm/IR/Instructions.h" | ||||
#include "llvm/IR/Operator.h" | #include "llvm/IR/Operator.h" | ||||
#include "llvm/IR/PassManager.h" | #include "llvm/IR/PassManager.h" | ||||
#include "llvm/IR/PatternMatch.h" | |||||
#include "llvm/IR/Type.h" | #include "llvm/IR/Type.h" | ||||
#include "llvm/IR/Value.h" | #include "llvm/IR/Value.h" | ||||
#include "llvm/IR/ValueHandle.h" | #include "llvm/IR/ValueHandle.h" | ||||
#include "llvm/InitializePasses.h" | #include "llvm/InitializePasses.h" | ||||
#include "llvm/Pass.h" | #include "llvm/Pass.h" | ||||
#include "llvm/Support/Casting.h" | #include "llvm/Support/Casting.h" | ||||
#include "llvm/Support/CommandLine.h" | #include "llvm/Support/CommandLine.h" | ||||
#include "llvm/Support/Debug.h" | #include "llvm/Support/Debug.h" | ||||
#include "llvm/Support/ErrorHandling.h" | #include "llvm/Support/ErrorHandling.h" | ||||
#include "llvm/Support/raw_ostream.h" | #include "llvm/Support/raw_ostream.h" | ||||
#include <algorithm> | #include <algorithm> | ||||
#include <cassert> | #include <cassert> | ||||
#include <cstdint> | #include <cstdint> | ||||
#include <cstdlib> | #include <cstdlib> | ||||
#include <iterator> | #include <iterator> | ||||
#include <utility> | #include <utility> | ||||
#include <vector> | #include <vector> | ||||
using namespace llvm; | using namespace llvm; | ||||
using namespace llvm::PatternMatch; | |||||
#define DEBUG_TYPE "loop-accesses" | #define DEBUG_TYPE "loop-accesses" | ||||
static cl::opt<unsigned, true> | static cl::opt<unsigned, true> | ||||
VectorizationFactor("force-vector-width", cl::Hidden, | VectorizationFactor("force-vector-width", cl::Hidden, | ||||
cl::desc("Sets the SIMD width. Zero is autoselect."), | cl::desc("Sets the SIMD width. Zero is autoselect."), | ||||
cl::location(VectorizerParams::VectorizationFactor)); | cl::location(VectorizerParams::VectorizationFactor)); | ||||
unsigned VectorizerParams::VectorizationFactor; | unsigned VectorizerParams::VectorizationFactor; | ||||
▲ Show 20 Lines • Show All 107 Lines • ▼ Show 20 Lines | |||||
/// N is a calculated back-edge taken count: | /// N is a calculated back-edge taken count: | ||||
/// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0 | /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0 | ||||
/// Start and End points are calculated in the following way: | /// Start and End points are calculated in the following way: | ||||
/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt, | /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt, | ||||
/// where SizeOfElt is the size of single memory access in bytes. | /// where SizeOfElt is the size of single memory access in bytes. | ||||
/// | /// | ||||
/// There is no conflict when the intervals are disjoint: | /// There is no conflict when the intervals are disjoint: | ||||
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End) | /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End) | ||||
void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, | void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, | ||||
unsigned DepSetId, unsigned ASId, | bool WritePtr, unsigned DepSetId, | ||||
const ValueToValueMap &Strides, | unsigned ASId, | ||||
PredicatedScalarEvolution &PSE) { | PredicatedScalarEvolution &PSE) { | ||||
// Get the stride replaced scev. | const SCEV *Sc = PtrExpr; | ||||
const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); | |||||
ScalarEvolution *SE = PSE.getSE(); | ScalarEvolution *SE = PSE.getSE(); | ||||
const SCEV *ScStart; | const SCEV *ScStart; | ||||
const SCEV *ScEnd; | const SCEV *ScEnd; | ||||
if (SE->isLoopInvariant(Sc, Lp)) { | if (SE->isLoopInvariant(Sc, Lp)) { | ||||
ScStart = ScEnd = Sc; | ScStart = ScEnd = Sc; | ||||
} else { | } else { | ||||
▲ Show 20 Lines • Show All 160 Lines • ▼ Show 20 Lines | void RuntimePointerChecking::groupChecks( | ||||
if (!UseDependencies) { | if (!UseDependencies) { | ||||
for (unsigned I = 0; I < Pointers.size(); ++I) | for (unsigned I = 0; I < Pointers.size(); ++I) | ||||
CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this)); | CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this)); | ||||
return; | return; | ||||
} | } | ||||
unsigned TotalComparisons = 0; | unsigned TotalComparisons = 0; | ||||
DenseMap<Value *, unsigned> PositionMap; | DenseMap<Value *, SmallVector<unsigned>> PositionMap; | ||||
for (unsigned Index = 0; Index < Pointers.size(); ++Index) | for (unsigned Index = 0; Index < Pointers.size(); ++Index) { | ||||
PositionMap[Pointers[Index].PointerValue] = Index; | auto Iter = PositionMap.insert({Pointers[Index].PointerValue, {}}); | ||||
Iter.first->second.push_back(Index); | |||||
} | |||||
// We need to keep track of what pointers we've already seen so we | // We need to keep track of what pointers we've already seen so we | ||||
// don't process them twice. | // don't process them twice. | ||||
SmallSet<unsigned, 2> Seen; | SmallSet<unsigned, 2> Seen; | ||||
// Go through all equivalence classes, get the "pointer check groups" | // Go through all equivalence classes, get the "pointer check groups" | ||||
// and add them to the overall solution. We use the order in which accesses | // and add them to the overall solution. We use the order in which accesses | ||||
// appear in 'Pointers' to enforce determinism. | // appear in 'Pointers' to enforce determinism. | ||||
Show All 14 Lines | for (unsigned I = 0; I < Pointers.size(); ++I) { | ||||
// iteration order within an equivalence class member is only dependent on | // iteration order within an equivalence class member is only dependent on | ||||
// the order in which unions and insertions are performed on the | // the order in which unions and insertions are performed on the | ||||
// equivalence class, the iteration order is deterministic. | // equivalence class, the iteration order is deterministic. | ||||
for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end(); | for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end(); | ||||
MI != ME; ++MI) { | MI != ME; ++MI) { | ||||
auto PointerI = PositionMap.find(MI->getPointer()); | auto PointerI = PositionMap.find(MI->getPointer()); | ||||
assert(PointerI != PositionMap.end() && | assert(PointerI != PositionMap.end() && | ||||
"pointer in equivalence class not found in PositionMap"); | "pointer in equivalence class not found in PositionMap"); | ||||
unsigned Pointer = PointerI->second; | for (unsigned Pointer : PointerI->second) { | ||||
bool Merged = false; | bool Merged = false; | ||||
// Mark this pointer as seen. | // Mark this pointer as seen. | ||||
Seen.insert(Pointer); | Seen.insert(Pointer); | ||||
// Go through all the existing sets and see if we can find one | // Go through all the existing sets and see if we can find one | ||||
// which can include this pointer. | // which can include this pointer. | ||||
for (RuntimeCheckingPtrGroup &Group : Groups) { | for (RuntimeCheckingPtrGroup &Group : Groups) { | ||||
// Don't perform more than a certain amount of comparisons. | // Don't perform more than a certain amount of comparisons. | ||||
// This should limit the cost of grouping the pointers to something | // This should limit the cost of grouping the pointers to something | ||||
// reasonable. If we do end up hitting this threshold, the algorithm | // reasonable. If we do end up hitting this threshold, the algorithm | ||||
// will create separate groups for all remaining pointers. | // will create separate groups for all remaining pointers. | ||||
if (TotalComparisons > MemoryCheckMergeThreshold) | if (TotalComparisons > MemoryCheckMergeThreshold) | ||||
break; | break; | ||||
TotalComparisons++; | TotalComparisons++; | ||||
if (Group.addPointer(Pointer, *this)) { | if (Group.addPointer(Pointer, *this)) { | ||||
Merged = true; | Merged = true; | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
if (!Merged) | if (!Merged) | ||||
// We couldn't add this pointer to any existing set or the threshold | // We couldn't add this pointer to any existing set or the threshold | ||||
// for the number of comparisons has been reached. Create a new group | // for the number of comparisons has been reached. Create a new group | ||||
// to hold the current pointer. | // to hold the current pointer. | ||||
Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this)); | Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this)); | ||||
} | } | ||||
} | |||||
// We've computed the grouped checks for this partition. | // We've computed the grouped checks for this partition. | ||||
// Save the results and continue with the next one. | // Save the results and continue with the next one. | ||||
llvm::copy(Groups, std::back_inserter(CheckingGroups)); | llvm::copy(Groups, std::back_inserter(CheckingGroups)); | ||||
} | } | ||||
} | } | ||||
bool RuntimePointerChecking::arePointersInSamePartition( | bool RuntimePointerChecking::arePointersInSamePartition( | ||||
▲ Show 20 Lines • Show All 182 Lines • ▼ Show 20 Lines | private: | ||||
PredicatedScalarEvolution &PSE; | PredicatedScalarEvolution &PSE; | ||||
}; | }; | ||||
} // end anonymous namespace | } // end anonymous namespace | ||||
/// Check whether a pointer can participate in a runtime bounds check. | /// Check whether a pointer can participate in a runtime bounds check. | ||||
/// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr | /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr | ||||
/// by adding run-time checks (overflow checks) if necessary. | /// by adding run-time checks (overflow checks) if necessary. | ||||
static bool hasComputableBounds(PredicatedScalarEvolution &PSE, | static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr, | ||||
const ValueToValueMap &Strides, Value *Ptr, | const SCEV *PtrScev, Loop *L, bool Assume) { | ||||
Loop *L, bool Assume) { | |||||
const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); | |||||
// The bounds for loop-invariant pointer is trivial. | // The bounds for loop-invariant pointer is trivial. | ||||
if (PSE.getSE()->isLoopInvariant(PtrScev, L)) | if (PSE.getSE()->isLoopInvariant(PtrScev, L)) | ||||
return true; | return true; | ||||
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); | const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); | ||||
if (!AR && Assume) | if (!AR && Assume) | ||||
AR = PSE.getAsAddRec(Ptr); | AR = PSE.getAsAddRec(Ptr); | ||||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, | ||||
MemAccessInfo Access, | MemAccessInfo Access, | ||||
const ValueToValueMap &StridesMap, | const ValueToValueMap &StridesMap, | ||||
DenseMap<Value *, unsigned> &DepSetId, | DenseMap<Value *, unsigned> &DepSetId, | ||||
Loop *TheLoop, unsigned &RunningDepId, | Loop *TheLoop, unsigned &RunningDepId, | ||||
unsigned ASId, bool ShouldCheckWrap, | unsigned ASId, bool ShouldCheckWrap, | ||||
bool Assume) { | bool Assume) { | ||||
Value *Ptr = Access.getPointer(); | Value *Ptr = Access.getPointer(); | ||||
if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume)) | auto TranslatePointers = [&](Value *Ptr) -> SmallVector<const SCEV *> { | ||||
ScalarEvolution &SE = *PSE.getSE(); | |||||
auto *GEP = dyn_cast<GetElementPtrInst>(Ptr); | |||||
auto AssumeInBoundsFlags = [&]() { | |||||
if (!GEP->isInBounds()) | |||||
return false; | |||||
// We'd like to propagate flags from the IR to the corresponding | |||||
// SCEV nodes, but to do that, we have to ensure that said flag is | |||||
// valid in the entire defined scope of the SCEV. | |||||
auto *GEPI = dyn_cast<Instruction>(GEP); | |||||
// TODO: non-instructions have global scope. We might be able to | |||||
// prove some global scope cases | |||||
return GEPI && programUndefinedIfPoison(GEPI); | |||||
}; | |||||
if (GEP && GEP->getNumOperands() == 2) { | |||||
if (auto *SI = dyn_cast<SelectInst>(GEP->getOperand(0))) { | |||||
const SCEV *BaseA = SE.getSCEV(SI->getOperand(1)); | |||||
const SCEV *BaseB = SE.getSCEV(SI->getOperand(2)); | |||||
const SCEV *Offset = SE.getSCEV(GEP->getOperand(1)); | |||||
if (SE.getTypeSizeInBits(Offset->getType()) < | |||||
SE.getTypeSizeInBits(BaseA->getType())) | |||||
Offset = SE.getSignExtendExpr( | |||||
Offset, SE.getEffectiveSCEVType(BaseA->getType())); | |||||
SCEV::NoWrapFlags OffsetWrap = | |||||
AssumeInBoundsFlags() ? SCEV::FlagNSW : SCEV::FlagAnyWrap; | |||||
Type *IntIdxTy = SE.getEffectiveSCEVType(BaseA->getType()); | |||||
auto *CurTy = GEP->getSourceElementType(); | |||||
const SCEV *ElementSize = SE.getSizeOfExpr(IntIdxTy, CurTy); | |||||
// Getelementptr indices are signed. | |||||
Offset = SE.getTruncateOrSignExtend(Offset, IntIdxTy); | |||||
// Multiply the index by the element size to compute the element | |||||
// offset. | |||||
Offset = SE.getMulExpr(Offset, ElementSize, OffsetWrap); | |||||
auto *PtrA = SE.getAddExpr(BaseA, Offset, SCEV::FlagNUW); | |||||
auto *PtrB = SE.getAddExpr(BaseB, Offset, SCEV::FlagNUW); | |||||
return {PtrA, PtrB}; | |||||
} else if (match(GEP->getOperand(1), | |||||
m_ZExt(m_Select(m_Value(), m_Value(), m_Value())))) { | |||||
auto *ZExt = cast<ZExtInst>(GEP->getOperand(1)); | |||||
auto *SI = cast<SelectInst>(ZExt->getOperand(0)); | |||||
const SCEV *OffsetA = SE.getSCEV(SI->getOperand(1)); | |||||
const SCEV *OffsetB = SE.getSCEV(SI->getOperand(2)); | |||||
SCEV::NoWrapFlags OffsetWrap = | |||||
AssumeInBoundsFlags() ? SCEV::FlagNSW : SCEV::FlagAnyWrap; | |||||
auto *Base = SE.getSCEV(GEP->getOperand(0)); | |||||
Type *IntIdxTy = SE.getEffectiveSCEVType(Base->getType()); | |||||
auto *CurTy = GEP->getSourceElementType(); | |||||
const SCEV *ElementSize = SE.getSizeOfExpr(IntIdxTy, CurTy); | |||||
// Getelementptr indices are signed. | |||||
OffsetA = SE.getTruncateOrSignExtend(OffsetA, IntIdxTy); | |||||
OffsetB = SE.getTruncateOrSignExtend(OffsetB, IntIdxTy); | |||||
// Multiply the index by the element size to compute the element | |||||
// offset. | |||||
OffsetA = SE.getMulExpr(OffsetA, ElementSize, OffsetWrap); | |||||
OffsetB = SE.getMulExpr(OffsetB, ElementSize, OffsetWrap); | |||||
auto *PtrA = SE.getAddExpr(Base, OffsetA, SCEV::FlagNUW); | |||||
auto *PtrB = SE.getAddExpr(Base, OffsetB, SCEV::FlagNUW); | |||||
return {PtrA, PtrB}; | |||||
} | |||||
} | |||||
return {replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr)}; | |||||
}; | |||||
SmallVector<const SCEV *> TranslatedPtrs = TranslatePointers(Ptr); | |||||
for (const SCEV *PtrExpr : TranslatedPtrs) { | |||||
if (!hasComputableBounds(PSE, Ptr, PtrExpr, TheLoop, Assume)) | |||||
return false; | return false; | ||||
// When we run after a failing dependency check we have to make sure | // When we run after a failing dependency check we have to make sure | ||||
// we don't have wrapping pointers. | // we don't have wrapping pointers. | ||||
if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) { | if (ShouldCheckWrap) { | ||||
if (TranslatedPtrs.size() > 1) { | |||||
return false; | |||||
} | |||||
if (!isNoWrap(PSE, StridesMap, Ptr, TheLoop)) { | |||||
auto *Expr = PSE.getSCEV(Ptr); | auto *Expr = PSE.getSCEV(Ptr); | ||||
if (!Assume || !isa<SCEVAddRecExpr>(Expr)) | if (!Assume || !isa<SCEVAddRecExpr>(Expr)) | ||||
return false; | return false; | ||||
PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); | PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); | ||||
} | } | ||||
} | |||||
// If there's only one option for Ptr, look it up after bounds and wrap | |||||
// checking, because assumptions might have been added to PSE. | |||||
if (TranslatedPtrs.size() == 1) | |||||
TranslatedPtrs[0] = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); | |||||
} | |||||
for (const SCEV *PtrExpr : TranslatedPtrs) { | |||||
// The id of the dependence set. | // The id of the dependence set. | ||||
unsigned DepId; | unsigned DepId; | ||||
if (isDependencyCheckNeeded()) { | if (isDependencyCheckNeeded()) { | ||||
Value *Leader = DepCands.getLeaderValue(Access).getPointer(); | Value *Leader = DepCands.getLeaderValue(Access).getPointer(); | ||||
unsigned &LeaderId = DepSetId[Leader]; | unsigned &LeaderId = DepSetId[Leader]; | ||||
if (!LeaderId) | if (!LeaderId) | ||||
LeaderId = RunningDepId++; | LeaderId = RunningDepId++; | ||||
DepId = LeaderId; | DepId = LeaderId; | ||||
} else | } else | ||||
// Each access has its own dependence set. | // Each access has its own dependence set. | ||||
DepId = RunningDepId++; | DepId = RunningDepId++; | ||||
bool IsWrite = Access.getInt(); | bool IsWrite = Access.getInt(); | ||||
RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE); | RtCheck.insert(TheLoop, Ptr, PtrExpr, IsWrite, DepId, ASId, PSE); | ||||
LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); | LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); | ||||
} | |||||
return true; | return true; | ||||
} | } | ||||
bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, | bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, | ||||
ScalarEvolution *SE, Loop *TheLoop, | ScalarEvolution *SE, Loop *TheLoop, | ||||
const ValueToValueMap &StridesMap, | const ValueToValueMap &StridesMap, | ||||
bool ShouldCheckWrap) { | bool ShouldCheckWrap) { | ||||
▲ Show 20 Lines • Show All 1,632 Lines • Show Last 20 Lines |