Changeset View
Changeset View
Standalone View
Standalone View
lib/Transforms/Vectorize/LoopVectorize.cpp
Show First 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | |||||
#include "llvm/ADT/MapVector.h" | #include "llvm/ADT/MapVector.h" | ||||
#include "llvm/ADT/SetVector.h" | #include "llvm/ADT/SetVector.h" | ||||
#include "llvm/ADT/SmallPtrSet.h" | #include "llvm/ADT/SmallPtrSet.h" | ||||
#include "llvm/ADT/SmallSet.h" | #include "llvm/ADT/SmallSet.h" | ||||
#include "llvm/ADT/SmallVector.h" | #include "llvm/ADT/SmallVector.h" | ||||
#include "llvm/ADT/Statistic.h" | #include "llvm/ADT/Statistic.h" | ||||
#include "llvm/ADT/StringExtras.h" | #include "llvm/ADT/StringExtras.h" | ||||
#include "llvm/Analysis/AliasAnalysis.h" | #include "llvm/Analysis/AliasAnalysis.h" | ||||
#include "llvm/Analysis/AliasSetTracker.h" | |||||
#include "llvm/Analysis/BlockFrequencyInfo.h" | #include "llvm/Analysis/BlockFrequencyInfo.h" | ||||
#include "llvm/Analysis/LoopInfo.h" | #include "llvm/Analysis/LoopInfo.h" | ||||
#include "llvm/Analysis/LoopIterator.h" | #include "llvm/Analysis/LoopIterator.h" | ||||
#include "llvm/Analysis/LoopPass.h" | #include "llvm/Analysis/LoopPass.h" | ||||
#include "llvm/Analysis/ScalarEvolution.h" | #include "llvm/Analysis/ScalarEvolution.h" | ||||
#include "llvm/Analysis/ScalarEvolutionExpander.h" | #include "llvm/Analysis/ScalarEvolutionExpander.h" | ||||
#include "llvm/Analysis/ScalarEvolutionExpressions.h" | #include "llvm/Analysis/ScalarEvolutionExpressions.h" | ||||
#include "llvm/Analysis/TargetTransformInfo.h" | #include "llvm/Analysis/TargetTransformInfo.h" | ||||
▲ Show 20 Lines • Show All 339 Lines • ▼ Show 20 Lines | protected: | ||||
/// The original loop. | /// The original loop. | ||||
Loop *OrigLoop; | Loop *OrigLoop; | ||||
/// Scev analysis to use. | /// Scev analysis to use. | ||||
ScalarEvolution *SE; | ScalarEvolution *SE; | ||||
/// Loop Info. | /// Loop Info. | ||||
LoopInfo *LI; | LoopInfo *LI; | ||||
/// Dominator Tree. | /// Dominator Tree. | ||||
DominatorTree *DT; | DominatorTree *DT; | ||||
/// Alias Analysis. | |||||
AliasAnalysis *AA; | |||||
/// Data Layout. | /// Data Layout. | ||||
const DataLayout *DL; | const DataLayout *DL; | ||||
/// Target Library Info. | /// Target Library Info. | ||||
const TargetLibraryInfo *TLI; | const TargetLibraryInfo *TLI; | ||||
/// The vectorization SIMD factor to use. Each vector will have this many | /// The vectorization SIMD factor to use. Each vector will have this many | ||||
/// vector elements. | /// vector elements. | ||||
unsigned VF; | unsigned VF; | ||||
▲ Show 20 Lines • Show All 142 Lines • ▼ Show 20 Lines | |||||
class LoopVectorizationLegality { | class LoopVectorizationLegality { | ||||
public: | public: | ||||
unsigned NumLoads; | unsigned NumLoads; | ||||
unsigned NumStores; | unsigned NumStores; | ||||
unsigned NumPredStores; | unsigned NumPredStores; | ||||
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL, | LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL, | ||||
DominatorTree *DT, TargetLibraryInfo *TLI, | DominatorTree *DT, TargetLibraryInfo *TLI, | ||||
Function *F) | AliasAnalysis *AA, Function *F) | ||||
: NumLoads(0), NumStores(0), NumPredStores(0), TheLoop(L), SE(SE), DL(DL), | : NumLoads(0), NumStores(0), NumPredStores(0), TheLoop(L), SE(SE), DL(DL), | ||||
DT(DT), TLI(TLI), TheFunction(F), Induction(nullptr), | DT(DT), TLI(TLI), AA(AA), TheFunction(F), Induction(nullptr), | ||||
WidestIndTy(nullptr), HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) { | WidestIndTy(nullptr), HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) { | ||||
} | } | ||||
/// This enum represents the kinds of reductions that we support. | /// This enum represents the kinds of reductions that we support. | ||||
enum ReductionKind { | enum ReductionKind { | ||||
RK_NoReduction, ///< Not a reduction. | RK_NoReduction, ///< Not a reduction. | ||||
RK_IntegerAdd, ///< Sum of integers. | RK_IntegerAdd, ///< Sum of integers. | ||||
RK_IntegerMult, ///< Product of integers. | RK_IntegerMult, ///< Product of integers. | ||||
▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines | struct RuntimePointerCheck { | ||||
/// Reset the state of the pointer runtime information. | /// Reset the state of the pointer runtime information. | ||||
void reset() { | void reset() { | ||||
Need = false; | Need = false; | ||||
Pointers.clear(); | Pointers.clear(); | ||||
Starts.clear(); | Starts.clear(); | ||||
Ends.clear(); | Ends.clear(); | ||||
IsWritePtr.clear(); | IsWritePtr.clear(); | ||||
DependencySetId.clear(); | DependencySetId.clear(); | ||||
AliasSetId.clear(); | |||||
} | } | ||||
/// Insert a pointer and calculate the start and end SCEVs. | /// Insert a pointer and calculate the start and end SCEVs. | ||||
void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, | void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, | ||||
unsigned DepSetId, ValueToValueMap &Strides); | unsigned DepSetId, unsigned ASId, ValueToValueMap &Strides); | ||||
/// This flag indicates if we need to add the runtime check. | /// This flag indicates if we need to add the runtime check. | ||||
bool Need; | bool Need; | ||||
/// Holds the pointers that we need to check. | /// Holds the pointers that we need to check. | ||||
SmallVector<TrackingVH<Value>, 2> Pointers; | SmallVector<TrackingVH<Value>, 2> Pointers; | ||||
/// Holds the pointer value at the beginning of the loop. | /// Holds the pointer value at the beginning of the loop. | ||||
SmallVector<const SCEV*, 2> Starts; | SmallVector<const SCEV*, 2> Starts; | ||||
/// Holds the pointer value at the end of the loop. | /// Holds the pointer value at the end of the loop. | ||||
SmallVector<const SCEV*, 2> Ends; | SmallVector<const SCEV*, 2> Ends; | ||||
/// Holds the information if this pointer is used for writing to memory. | /// Holds the information if this pointer is used for writing to memory. | ||||
SmallVector<bool, 2> IsWritePtr; | SmallVector<bool, 2> IsWritePtr; | ||||
/// Holds the id of the set of pointers that could be dependent because of a | /// Holds the id of the set of pointers that could be dependent because of a | ||||
/// shared underlying object. | /// shared underlying object. | ||||
SmallVector<unsigned, 2> DependencySetId; | SmallVector<unsigned, 2> DependencySetId; | ||||
/// Holds the id of the disjoint alias set to which this pointer belongs. | |||||
SmallVector<unsigned, 2> AliasSetId; | |||||
}; | }; | ||||
/// A struct for saving information about induction variables. | /// A struct for saving information about induction variables. | ||||
struct InductionInfo { | struct InductionInfo { | ||||
InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {} | InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {} | ||||
InductionInfo() : StartValue(nullptr), IK(IK_NoInduction) {} | InductionInfo() : StartValue(nullptr), IK(IK_NoInduction) {} | ||||
/// Start value. | /// Start value. | ||||
TrackingVH<Value> StartValue; | TrackingVH<Value> StartValue; | ||||
▲ Show 20 Lines • Show All 128 Lines • ▼ Show 20 Lines | private: | ||||
/// Scev analysis. | /// Scev analysis. | ||||
ScalarEvolution *SE; | ScalarEvolution *SE; | ||||
/// DataLayout analysis. | /// DataLayout analysis. | ||||
const DataLayout *DL; | const DataLayout *DL; | ||||
/// Dominators. | /// Dominators. | ||||
DominatorTree *DT; | DominatorTree *DT; | ||||
/// Target Library Info. | /// Target Library Info. | ||||
TargetLibraryInfo *TLI; | TargetLibraryInfo *TLI; | ||||
/// Alias analysis. | |||||
AliasAnalysis *AA; | |||||
/// Parent function | /// Parent function | ||||
Function *TheFunction; | Function *TheFunction; | ||||
// --- vectorization state --- // | // --- vectorization state --- // | ||||
/// Holds the integer induction variable. This is the counter of the | /// Holds the integer induction variable. This is the counter of the | ||||
/// loop. | /// loop. | ||||
PHINode *Induction; | PHINode *Induction; | ||||
▲ Show 20 Lines • Show All 322 Lines • ▼ Show 20 Lines | struct LoopVectorize : public FunctionPass { | ||||
ScalarEvolution *SE; | ScalarEvolution *SE; | ||||
const DataLayout *DL; | const DataLayout *DL; | ||||
LoopInfo *LI; | LoopInfo *LI; | ||||
TargetTransformInfo *TTI; | TargetTransformInfo *TTI; | ||||
DominatorTree *DT; | DominatorTree *DT; | ||||
BlockFrequencyInfo *BFI; | BlockFrequencyInfo *BFI; | ||||
TargetLibraryInfo *TLI; | TargetLibraryInfo *TLI; | ||||
AliasAnalysis *AA; | |||||
bool DisableUnrolling; | bool DisableUnrolling; | ||||
bool AlwaysVectorize; | bool AlwaysVectorize; | ||||
BlockFrequency ColdEntryFreq; | BlockFrequency ColdEntryFreq; | ||||
bool runOnFunction(Function &F) override { | bool runOnFunction(Function &F) override { | ||||
SE = &getAnalysis<ScalarEvolution>(); | SE = &getAnalysis<ScalarEvolution>(); | ||||
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); | DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); | ||||
DL = DLP ? &DLP->getDataLayout() : nullptr; | DL = DLP ? &DLP->getDataLayout() : nullptr; | ||||
LI = &getAnalysis<LoopInfo>(); | LI = &getAnalysis<LoopInfo>(); | ||||
TTI = &getAnalysis<TargetTransformInfo>(); | TTI = &getAnalysis<TargetTransformInfo>(); | ||||
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | ||||
BFI = &getAnalysis<BlockFrequencyInfo>(); | BFI = &getAnalysis<BlockFrequencyInfo>(); | ||||
TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); | TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); | ||||
AA = &getAnalysis<AliasAnalysis>(); | |||||
// Compute some weights outside of the loop over the loops. Compute this | // Compute some weights outside of the loop over the loops. Compute this | ||||
// using a BranchProbability to re-use its scaling math. | // using a BranchProbability to re-use its scaling math. | ||||
const BranchProbability ColdProb(1, 5); // 20% | const BranchProbability ColdProb(1, 5); // 20% | ||||
ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb; | ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb; | ||||
// If the target claims to have no vector registers don't attempt | // If the target claims to have no vector registers don't attempt | ||||
// vectorization. | // vectorization. | ||||
▲ Show 20 Lines • Show All 95 Lines • ▼ Show 20 Lines | if (TC > 0u && TC < TinyTripCountVectorThreshold) { | ||||
emitOptimizationRemarkAnalysis( | emitOptimizationRemarkAnalysis( | ||||
F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), | F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), | ||||
"vectorization is not beneficial and is not explicitly forced"); | "vectorization is not beneficial and is not explicitly forced"); | ||||
return false; | return false; | ||||
} | } | ||||
} | } | ||||
// Check if it is legal to vectorize the loop. | // Check if it is legal to vectorize the loop. | ||||
LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, F); | LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F); | ||||
if (!LVL.canVectorize()) { | if (!LVL.canVectorize()) { | ||||
DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); | DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); | ||||
emitMissedWarning(F, L, Hints); | emitMissedWarning(F, L, Hints); | ||||
return false; | return false; | ||||
} | } | ||||
// Use the cost model. | // Use the cost model. | ||||
LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI); | LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI); | ||||
▲ Show 20 Lines • Show All 87 Lines • ▼ Show 20 Lines | #endif /* NDEBUG */ | ||||
void getAnalysisUsage(AnalysisUsage &AU) const override { | void getAnalysisUsage(AnalysisUsage &AU) const override { | ||||
AU.addRequiredID(LoopSimplifyID); | AU.addRequiredID(LoopSimplifyID); | ||||
AU.addRequiredID(LCSSAID); | AU.addRequiredID(LCSSAID); | ||||
AU.addRequired<BlockFrequencyInfo>(); | AU.addRequired<BlockFrequencyInfo>(); | ||||
AU.addRequired<DominatorTreeWrapperPass>(); | AU.addRequired<DominatorTreeWrapperPass>(); | ||||
AU.addRequired<LoopInfo>(); | AU.addRequired<LoopInfo>(); | ||||
AU.addRequired<ScalarEvolution>(); | AU.addRequired<ScalarEvolution>(); | ||||
AU.addRequired<TargetTransformInfo>(); | AU.addRequired<TargetTransformInfo>(); | ||||
AU.addRequired<AliasAnalysis>(); | |||||
AU.addPreserved<LoopInfo>(); | AU.addPreserved<LoopInfo>(); | ||||
AU.addPreserved<DominatorTreeWrapperPass>(); | AU.addPreserved<DominatorTreeWrapperPass>(); | ||||
AU.addPreserved<AliasAnalysis>(); | |||||
} | } | ||||
}; | }; | ||||
} // end anonymous namespace | } // end anonymous namespace | ||||
//===----------------------------------------------------------------------===// | //===----------------------------------------------------------------------===// | ||||
// Implementation of LoopVectorizationLegality, InnerLoopVectorizer and | // Implementation of LoopVectorizationLegality, InnerLoopVectorizer and | ||||
Show All 39 Lines | static const SCEV *replaceSymbolicStrideSCEV(ScalarEvolution *SE, | ||||
} | } | ||||
// Otherwise, just return the SCEV of the original pointer. | // Otherwise, just return the SCEV of the original pointer. | ||||
return SE->getSCEV(Ptr); | return SE->getSCEV(Ptr); | ||||
} | } | ||||
void LoopVectorizationLegality::RuntimePointerCheck::insert( | void LoopVectorizationLegality::RuntimePointerCheck::insert( | ||||
ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, | ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, | ||||
ValueToValueMap &Strides) { | unsigned ASId, ValueToValueMap &Strides) { | ||||
// Get the stride replaced scev. | // Get the stride replaced scev. | ||||
const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr); | const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr); | ||||
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); | const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); | ||||
assert(AR && "Invalid addrec expression"); | assert(AR && "Invalid addrec expression"); | ||||
const SCEV *Ex = SE->getBackedgeTakenCount(Lp); | const SCEV *Ex = SE->getBackedgeTakenCount(Lp); | ||||
const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); | const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); | ||||
Pointers.push_back(Ptr); | Pointers.push_back(Ptr); | ||||
Starts.push_back(AR->getStart()); | Starts.push_back(AR->getStart()); | ||||
Ends.push_back(ScEnd); | Ends.push_back(ScEnd); | ||||
IsWritePtr.push_back(WritePtr); | IsWritePtr.push_back(WritePtr); | ||||
DependencySetId.push_back(DepSetId); | DependencySetId.push_back(DepSetId); | ||||
AliasSetId.push_back(ASId); | |||||
} | } | ||||
Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) { | Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) { | ||||
// We need to place the broadcast of invariant variables outside the loop. | // We need to place the broadcast of invariant variables outside the loop. | ||||
Instruction *Instr = dyn_cast<Instruction>(V); | Instruction *Instr = dyn_cast<Instruction>(V); | ||||
bool NewInstr = | bool NewInstr = | ||||
(Instr && std::find(LoopVectorBody.begin(), LoopVectorBody.end(), | (Instr && std::find(LoopVectorBody.begin(), LoopVectorBody.end(), | ||||
Instr->getParent()) != LoopVectorBody.end()); | Instr->getParent()) != LoopVectorBody.end()); | ||||
▲ Show 20 Lines • Show All 529 Lines • ▼ Show 20 Lines | for (unsigned i = 0; i < NumPointers; ++i) { | ||||
for (unsigned j = i+1; j < NumPointers; ++j) { | for (unsigned j = i+1; j < NumPointers; ++j) { | ||||
// No need to check if two readonly pointers intersect. | // No need to check if two readonly pointers intersect. | ||||
if (!PtrRtCheck->IsWritePtr[i] && !PtrRtCheck->IsWritePtr[j]) | if (!PtrRtCheck->IsWritePtr[i] && !PtrRtCheck->IsWritePtr[j]) | ||||
continue; | continue; | ||||
// Only need to check pointers between two different dependency sets. | // Only need to check pointers between two different dependency sets. | ||||
if (PtrRtCheck->DependencySetId[i] == PtrRtCheck->DependencySetId[j]) | if (PtrRtCheck->DependencySetId[i] == PtrRtCheck->DependencySetId[j]) | ||||
continue; | continue; | ||||
// Only need to check pointers in the same alias set. | |||||
if (PtrRtCheck->AliasSetId[i] != PtrRtCheck->AliasSetId[j]) | |||||
continue; | |||||
unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace(); | unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace(); | ||||
unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace(); | unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace(); | ||||
assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) && | assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) && | ||||
(AS1 == Ends[i]->getType()->getPointerAddressSpace()) && | (AS1 == Ends[i]->getType()->getPointerAddressSpace()) && | ||||
"Trying to bounds check pointers with different address spaces"); | "Trying to bounds check pointers with different address spaces"); | ||||
▲ Show 20 Lines • Show All 1,896 Lines • ▼ Show 20 Lines | |||||
public: | public: | ||||
/// \brief Read or write access location. | /// \brief Read or write access location. | ||||
typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; | typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; | ||||
typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet; | typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet; | ||||
/// \brief Set of potential dependent memory accesses. | /// \brief Set of potential dependent memory accesses. | ||||
typedef EquivalenceClasses<MemAccessInfo> DepCandidates; | typedef EquivalenceClasses<MemAccessInfo> DepCandidates; | ||||
AccessAnalysis(const DataLayout *Dl, DepCandidates &DA) : | AccessAnalysis(const DataLayout *Dl, AliasAnalysis *AA, DepCandidates &DA) : | ||||
DL(Dl), DepCands(DA), AreAllWritesIdentified(true), | DL(Dl), AA(AA), AST(*AA), DepCands(DA), IsRTCheckNeeded(false) {} | ||||
AreAllReadsIdentified(true), IsRTCheckNeeded(false) {} | |||||
/// \brief Register a load and whether it is only read from. | /// \brief Register a load and whether it is only read from. | ||||
void addLoad(Value *Ptr, bool IsReadOnly) { | void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) { | ||||
Value *Ptr = const_cast<Value*>(Loc.Ptr); | |||||
AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.TBAATag); | |||||
Accesses.insert(MemAccessInfo(Ptr, false)); | Accesses.insert(MemAccessInfo(Ptr, false)); | ||||
if (IsReadOnly) | if (IsReadOnly) | ||||
ReadOnlyPtr.insert(Ptr); | ReadOnlyPtr.insert(Ptr); | ||||
} | } | ||||
/// \brief Register a store. | /// \brief Register a store. | ||||
void addStore(Value *Ptr) { | void addStore(AliasAnalysis::Location &Loc) { | ||||
Value *Ptr = const_cast<Value*>(Loc.Ptr); | |||||
AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.TBAATag); | |||||
Accesses.insert(MemAccessInfo(Ptr, true)); | Accesses.insert(MemAccessInfo(Ptr, true)); | ||||
} | } | ||||
/// \brief Check whether we can check the pointers at runtime for | /// \brief Check whether we can check the pointers at runtime for | ||||
/// non-intersection. | /// non-intersection. | ||||
bool canCheckPtrAtRT(LoopVectorizationLegality::RuntimePointerCheck &RtCheck, | bool canCheckPtrAtRT(LoopVectorizationLegality::RuntimePointerCheck &RtCheck, | ||||
unsigned &NumComparisons, ScalarEvolution *SE, | unsigned &NumComparisons, ScalarEvolution *SE, | ||||
Loop *TheLoop, ValueToValueMap &Strides, | Loop *TheLoop, ValueToValueMap &Strides, | ||||
bool ShouldCheckStride = false); | bool ShouldCheckStride = false); | ||||
/// \brief Goes over all memory accesses, checks whether a RT check is needed | /// \brief Goes over all memory accesses, checks whether a RT check is needed | ||||
/// and builds sets of dependent accesses. | /// and builds sets of dependent accesses. | ||||
void buildDependenceSets() { | void buildDependenceSets() { | ||||
// Process read-write pointers first. | processMemAccesses(); | ||||
processMemAccesses(false); | |||||
// Next, process read pointers. | |||||
processMemAccesses(true); | |||||
} | } | ||||
bool isRTCheckNeeded() { return IsRTCheckNeeded; } | bool isRTCheckNeeded() { return IsRTCheckNeeded; } | ||||
bool isDependencyCheckNeeded() { return !CheckDeps.empty(); } | bool isDependencyCheckNeeded() { return !CheckDeps.empty(); } | ||||
void resetDepChecks() { CheckDeps.clear(); } | void resetDepChecks() { CheckDeps.clear(); } | ||||
MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; } | MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; } | ||||
private: | private: | ||||
typedef SetVector<MemAccessInfo> PtrAccessSet; | typedef SetVector<MemAccessInfo> PtrAccessSet; | ||||
typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap; | |||||
/// \brief Go over all memory access or only the deferred ones if | /// \brief Go over all memory access and check whether runtime pointer checks | ||||
/// \p UseDeferred is true and check whether runtime pointer checks are needed | /// are needed /// and build sets of dependency check candidates. | ||||
/// and build sets of dependency check candidates. | void processMemAccesses(); | ||||
void processMemAccesses(bool UseDeferred); | |||||
/// Set of all accesses. | /// Set of all accesses. | ||||
PtrAccessSet Accesses; | PtrAccessSet Accesses; | ||||
/// Set of access to check after all writes have been processed. | |||||
PtrAccessSet DeferredAccesses; | |||||
/// Map of pointers to last access encountered. | |||||
UnderlyingObjToAccessMap ObjToLastAccess; | |||||
/// Set of accesses that need a further dependence check. | /// Set of accesses that need a further dependence check. | ||||
MemAccessInfoSet CheckDeps; | MemAccessInfoSet CheckDeps; | ||||
/// Set of pointers that are read only. | /// Set of pointers that are read only. | ||||
SmallPtrSet<Value*, 16> ReadOnlyPtr; | SmallPtrSet<Value*, 16> ReadOnlyPtr; | ||||
/// Set of underlying objects already written to. | |||||
SmallPtrSet<Value*, 16> WriteObjects; | |||||
const DataLayout *DL; | const DataLayout *DL; | ||||
AliasAnalysis *AA; | |||||
/// An alias set tracker to partition the access set by underlying object and | |||||
//intrinsic property (such as TBAA metadata). | |||||
AliasSetTracker AST; | |||||
/// Sets of potentially dependent accesses - members of one set share an | /// Sets of potentially dependent accesses - members of one set share an | ||||
/// underlying pointer. The set "CheckDeps" identfies which sets really need a | /// underlying pointer. The set "CheckDeps" identfies which sets really need a | ||||
/// dependence check. | /// dependence check. | ||||
DepCandidates &DepCands; | DepCandidates &DepCands; | ||||
bool AreAllWritesIdentified; | |||||
bool AreAllReadsIdentified; | |||||
bool IsRTCheckNeeded; | bool IsRTCheckNeeded; | ||||
}; | }; | ||||
} // end anonymous namespace | } // end anonymous namespace | ||||
/// \brief Check whether a pointer can participate in a runtime bounds check. | /// \brief Check whether a pointer can participate in a runtime bounds check. | ||||
static bool hasComputableBounds(ScalarEvolution *SE, ValueToValueMap &Strides, | static bool hasComputableBounds(ScalarEvolution *SE, ValueToValueMap &Strides, | ||||
Value *Ptr) { | Value *Ptr) { | ||||
Show All 11 Lines | static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr, | ||||
const Loop *Lp, ValueToValueMap &StridesMap); | const Loop *Lp, ValueToValueMap &StridesMap); | ||||
bool AccessAnalysis::canCheckPtrAtRT( | bool AccessAnalysis::canCheckPtrAtRT( | ||||
LoopVectorizationLegality::RuntimePointerCheck &RtCheck, | LoopVectorizationLegality::RuntimePointerCheck &RtCheck, | ||||
unsigned &NumComparisons, ScalarEvolution *SE, Loop *TheLoop, | unsigned &NumComparisons, ScalarEvolution *SE, Loop *TheLoop, | ||||
ValueToValueMap &StridesMap, bool ShouldCheckStride) { | ValueToValueMap &StridesMap, bool ShouldCheckStride) { | ||||
// Find pointers with computable bounds. We are going to use this information | // Find pointers with computable bounds. We are going to use this information | ||||
// to place a runtime bound check. | // to place a runtime bound check. | ||||
unsigned NumReadPtrChecks = 0; | |||||
unsigned NumWritePtrChecks = 0; | |||||
bool CanDoRT = true; | bool CanDoRT = true; | ||||
bool IsDepCheckNeeded = isDependencyCheckNeeded(); | bool IsDepCheckNeeded = isDependencyCheckNeeded(); | ||||
NumComparisons = 0; | |||||
// We assign a consecutive id to access from different alias sets. | |||||
// Accesses between different groups doesn't need to be checked. | |||||
unsigned ASId = 1; | |||||
for (auto &AS : AST) { | |||||
unsigned NumReadPtrChecks = 0; | |||||
unsigned NumWritePtrChecks = 0; | |||||
// We assign consecutive id to access from different dependence sets. | // We assign consecutive id to access from different dependence sets. | ||||
// Accesses within the same set don't need a runtime check. | // Accesses within the same set don't need a runtime check. | ||||
unsigned RunningDepId = 1; | unsigned RunningDepId = 1; | ||||
DenseMap<Value *, unsigned> DepSetId; | DenseMap<Value *, unsigned> DepSetId; | ||||
for (PtrAccessSet::iterator AI = Accesses.begin(), AE = Accesses.end(); | for (auto A : AS) { | ||||
AI != AE; ++AI) { | Value *Ptr = A.getValue(); | ||||
const MemAccessInfo &Access = *AI; | bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); | ||||
Value *Ptr = Access.getPointer(); | MemAccessInfo Access(Ptr, IsWrite); | ||||
bool IsWrite = Access.getInt(); | |||||
// Just add write checks if we have both. | |||||
if (!IsWrite && Accesses.count(MemAccessInfo(Ptr, true))) | |||||
continue; | |||||
if (IsWrite) | if (IsWrite) | ||||
++NumWritePtrChecks; | ++NumWritePtrChecks; | ||||
else | else | ||||
++NumReadPtrChecks; | ++NumReadPtrChecks; | ||||
if (hasComputableBounds(SE, StridesMap, Ptr) && | if (hasComputableBounds(SE, StridesMap, Ptr) && | ||||
// When we run after a failing dependency check we have to make sure we | // When we run after a failing dependency check we have to make sure we | ||||
// don't have wrapping pointers. | // don't have wrapping pointers. | ||||
(!ShouldCheckStride || | (!ShouldCheckStride || | ||||
isStridedPtr(SE, DL, Ptr, TheLoop, StridesMap) == 1)) { | isStridedPtr(SE, DL, Ptr, TheLoop, StridesMap) == 1)) { | ||||
// The id of the dependence set. | // The id of the dependence set. | ||||
unsigned DepId; | unsigned DepId; | ||||
if (IsDepCheckNeeded) { | if (IsDepCheckNeeded) { | ||||
Value *Leader = DepCands.getLeaderValue(Access).getPointer(); | Value *Leader = DepCands.getLeaderValue(Access).getPointer(); | ||||
unsigned &LeaderId = DepSetId[Leader]; | unsigned &LeaderId = DepSetId[Leader]; | ||||
if (!LeaderId) | if (!LeaderId) | ||||
LeaderId = RunningDepId++; | LeaderId = RunningDepId++; | ||||
DepId = LeaderId; | DepId = LeaderId; | ||||
} else | } else | ||||
// Each access has its own dependence set. | // Each access has its own dependence set. | ||||
DepId = RunningDepId++; | DepId = RunningDepId++; | ||||
RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, StridesMap); | RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap); | ||||
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n'); | DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n'); | ||||
} else { | } else { | ||||
CanDoRT = false; | CanDoRT = false; | ||||
} | } | ||||
} | } | ||||
if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2) | if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2) | ||||
NumComparisons = 0; // Only one dependence set. | NumComparisons += 0; // Only one dependence set. | ||||
else { | else { | ||||
NumComparisons = (NumWritePtrChecks * (NumReadPtrChecks + | NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks + | ||||
NumWritePtrChecks - 1)); | NumWritePtrChecks - 1)); | ||||
} | } | ||||
++ASId; | |||||
} | |||||
// If the pointers that we would use for the bounds comparison have different | // If the pointers that we would use for the bounds comparison have different | ||||
// address spaces, assume the values aren't directly comparable, so we can't | // address spaces, assume the values aren't directly comparable, so we can't | ||||
// use them for the runtime check. We also have to assume they could | // use them for the runtime check. We also have to assume they could | ||||
// overlap. In the future there should be metadata for whether address spaces | // overlap. In the future there should be metadata for whether address spaces | ||||
// are disjoint. | // are disjoint. | ||||
unsigned NumPointers = RtCheck.Pointers.size(); | unsigned NumPointers = RtCheck.Pointers.size(); | ||||
for (unsigned i = 0; i < NumPointers; ++i) { | for (unsigned i = 0; i < NumPointers; ++i) { | ||||
for (unsigned j = i + 1; j < NumPointers; ++j) { | for (unsigned j = i + 1; j < NumPointers; ++j) { | ||||
// Only need to check pointers between two different dependency sets. | // Only need to check pointers between two different dependency sets. | ||||
if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j]) | if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j]) | ||||
continue; | continue; | ||||
// Only need to check pointers in the same alias set. | |||||
if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j]) | |||||
continue; | |||||
Value *PtrI = RtCheck.Pointers[i]; | Value *PtrI = RtCheck.Pointers[i]; | ||||
Value *PtrJ = RtCheck.Pointers[j]; | Value *PtrJ = RtCheck.Pointers[j]; | ||||
unsigned ASi = PtrI->getType()->getPointerAddressSpace(); | unsigned ASi = PtrI->getType()->getPointerAddressSpace(); | ||||
unsigned ASj = PtrJ->getType()->getPointerAddressSpace(); | unsigned ASj = PtrJ->getType()->getPointerAddressSpace(); | ||||
if (ASi != ASj) { | if (ASi != ASj) { | ||||
DEBUG(dbgs() << "LV: Runtime check would require comparison between" | DEBUG(dbgs() << "LV: Runtime check would require comparison between" | ||||
" different address spaces\n"); | " different address spaces\n"); | ||||
return false; | return false; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
return CanDoRT; | return CanDoRT; | ||||
} | } | ||||
static bool isFunctionScopeIdentifiedObject(Value *Ptr) { | void AccessAnalysis::processMemAccesses() { | ||||
return isNoAliasArgument(Ptr) || isNoAliasCall(Ptr) || isa<AllocaInst>(Ptr); | |||||
} | |||||
void AccessAnalysis::processMemAccesses(bool UseDeferred) { | |||||
// We process the set twice: first we process read-write pointers, last we | // We process the set twice: first we process read-write pointers, last we | ||||
// process read-only pointers. This allows us to skip dependence tests for | // process read-only pointers. This allows us to skip dependence tests for | ||||
// read-only pointers. | // read-only pointers. | ||||
DEBUG(dbgs() << "LV: Processing memory accesses...\n"); | |||||
DEBUG(dbgs() << " AST: "; AST.dump()); | |||||
DEBUG(dbgs() << "LV: Accesses:\n"); | |||||
DEBUG({ | |||||
for (auto A : Accesses) | |||||
dbgs() << "\t" << *A.getPointer() << " (" << | |||||
(A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? | |||||
"read-only" : "read")) << ")\n"; | |||||
}); | |||||
// The AliasSetTracker has nicely partitioned our pointers by metadata | |||||
// compatibility and potential for underlying-object overlap. As a result, we | |||||
// only need to check for potential pointer dependencies within each alias | |||||
// set. | |||||
for (auto &AS : AST) { | |||||
// Note that both the alias-set tracker and the alias sets themselves used | |||||
// linked lists internally and so the iteration order here is deterministic | |||||
// (matching the original instruction order within each set). | |||||
bool SetHasWrite = false; | |||||
// Map of pointers to last access encountered. | |||||
typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap; | |||||
UnderlyingObjToAccessMap ObjToLastAccess; | |||||
// Set of access to check after all writes have been processed. | |||||
PtrAccessSet DeferredAccesses; | |||||
// Iterate over each alias set twice, once to process read/write pointers, | |||||
// and then to process read-only pointers. | |||||
for (int SetIteration = 0; SetIteration < 2; ++SetIteration) { | |||||
bool UseDeferred = SetIteration > 0; | |||||
PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses; | PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses; | ||||
for (PtrAccessSet::iterator AI = S.begin(), AE = S.end(); AI != AE; ++AI) { | |||||
const MemAccessInfo &Access = *AI; | |||||
Value *Ptr = Access.getPointer(); | |||||
bool IsWrite = Access.getInt(); | |||||
for (auto A : AS) { | |||||
Value *Ptr = A.getValue(); | |||||
bool IsWrite = S.count(MemAccessInfo(Ptr, true)); | |||||
// If we're using the deferred access set, then it contains only reads. | |||||
bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite; | |||||
if (UseDeferred && !IsReadOnlyPtr) | |||||
continue; | |||||
// Otherwise, the pointer must be in the PtrAccessSet, either as a read | |||||
// or a write. | |||||
assert(((IsReadOnlyPtr && UseDeferred) || IsWrite || | |||||
S.count(MemAccessInfo(Ptr, false))) && | |||||
"Alias-set pointer not in the access set?"); | |||||
MemAccessInfo Access(Ptr, IsWrite); | |||||
DepCands.insert(Access); | DepCands.insert(Access); | ||||
// Memorize read-only pointers for later processing and skip them in the | // Memorize read-only pointers for later processing and skip them in the | ||||
// first round (they need to be checked after we have seen all write | // first round (they need to be checked after we have seen all write | ||||
// pointers). Note: we also mark pointer that are not consecutive as | // pointers). Note: we also mark pointer that are not consecutive as | ||||
// "read-only" pointers (so that we check "a[b[i]] +="). Hence, we need the | // "read-only" pointers (so that we check "a[b[i]] +="). Hence, we need | ||||
// second check for "!IsWrite". | // the second check for "!IsWrite". | ||||
bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite; | |||||
if (!UseDeferred && IsReadOnlyPtr) { | if (!UseDeferred && IsReadOnlyPtr) { | ||||
DeferredAccesses.insert(Access); | DeferredAccesses.insert(Access); | ||||
continue; | continue; | ||||
} | } | ||||
bool NeedDepCheck = false; | |||||
// Check whether there is the possibility of dependency because of | |||||
// underlying objects being the same. | |||||
typedef SmallVector<Value*, 16> ValueVector; | |||||
ValueVector TempObjects; | |||||
GetUnderlyingObjects(Ptr, TempObjects, DL); | |||||
for (ValueVector::iterator UI = TempObjects.begin(), UE = TempObjects.end(); | |||||
UI != UE; ++UI) { | |||||
Value *UnderlyingObj = *UI; | |||||
// If this is a write then it needs to be an identified object. If this a | |||||
// read and all writes (so far) are identified function scope objects we | |||||
// don't need an identified underlying object but only an Argument (the | |||||
// next write is going to invalidate this assumption if it is | |||||
// unidentified). | |||||
// This is a micro-optimization for the case where all writes are | |||||
// identified and we have one argument pointer. | |||||
// Otherwise, we do need a runtime check. | |||||
if ((IsWrite && !isFunctionScopeIdentifiedObject(UnderlyingObj)) || | |||||
(!IsWrite && (!AreAllWritesIdentified || | |||||
!isa<Argument>(UnderlyingObj)) && | |||||
!isIdentifiedObject(UnderlyingObj))) { | |||||
DEBUG(dbgs() << "LV: Found an unidentified " << | |||||
(IsWrite ? "write" : "read" ) << " ptr: " << *UnderlyingObj << | |||||
"\n"); | |||||
IsRTCheckNeeded = (IsRTCheckNeeded || | |||||
!isIdentifiedObject(UnderlyingObj) || | |||||
!AreAllReadsIdentified); | |||||
if (IsWrite) | |||||
AreAllWritesIdentified = false; | |||||
if (!IsWrite) | |||||
AreAllReadsIdentified = false; | |||||
} | |||||
// If this is a write - check other reads and writes for conflicts. If | // If this is a write - check other reads and writes for conflicts. If | ||||
// this is a read only check other writes for conflicts (but only if there | // this is a read only check other writes for conflicts (but only if | ||||
// is no other write to the ptr - this is an optimization to catch "a[i] = | // there is no other write to the ptr - this is an optimization to | ||||
// a[i] + " without having to do a dependence check). | // catch "a[i] = a[i] + " without having to do a dependence check). | ||||
if ((IsWrite || IsReadOnlyPtr) && WriteObjects.count(UnderlyingObj)) | if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) { | ||||
NeedDepCheck = true; | CheckDeps.insert(Access); | ||||
IsRTCheckNeeded = true; | |||||
} | |||||
if (IsWrite) | if (IsWrite) | ||||
WriteObjects.insert(UnderlyingObj); | SetHasWrite = true; | ||||
// Create sets of pointers connected by shared underlying objects. | // Create sets of pointers connected by a shared alias set and | ||||
// underlying object. | |||||
typedef SmallVector<Value*, 16> ValueVector; | |||||
ValueVector TempObjects; | |||||
GetUnderlyingObjects(Ptr, TempObjects, DL); | |||||
for (Value *UnderlyingObj : TempObjects) { | |||||
UnderlyingObjToAccessMap::iterator Prev = | UnderlyingObjToAccessMap::iterator Prev = | ||||
ObjToLastAccess.find(UnderlyingObj); | ObjToLastAccess.find(UnderlyingObj); | ||||
if (Prev != ObjToLastAccess.end()) | if (Prev != ObjToLastAccess.end()) | ||||
DepCands.unionSets(Access, Prev->second); | DepCands.unionSets(Access, Prev->second); | ||||
ObjToLastAccess[UnderlyingObj] = Access; | ObjToLastAccess[UnderlyingObj] = Access; | ||||
} | } | ||||
} | |||||
if (NeedDepCheck) | } | ||||
CheckDeps.insert(Access); | |||||
} | } | ||||
} | } | ||||
namespace { | namespace { | ||||
/// \brief Checks memory dependences among accesses to the same underlying | /// \brief Checks memory dependences among accesses to the same underlying | ||||
/// object to determine whether there vectorization is legal or not (and at | /// object to determine whether there vectorization is legal or not (and at | ||||
/// which vectorization factor). | /// which vectorization factor). | ||||
/// | /// | ||||
▲ Show 20 Lines • Show All 243 Lines • ▼ Show 20 Lines | bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, | ||||
Value *BPtr = B.getPointer(); | Value *BPtr = B.getPointer(); | ||||
bool AIsWrite = A.getInt(); | bool AIsWrite = A.getInt(); | ||||
bool BIsWrite = B.getInt(); | bool BIsWrite = B.getInt(); | ||||
// Two reads are independent. | // Two reads are independent. | ||||
if (!AIsWrite && !BIsWrite) | if (!AIsWrite && !BIsWrite) | ||||
return false; | return false; | ||||
// We cannot check pointers in different address spaces. | |||||
if (APtr->getType()->getPointerAddressSpace() != | |||||
BPtr->getType()->getPointerAddressSpace()) | |||||
return true; | |||||
const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr); | const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr); | ||||
const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr); | const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr); | ||||
int StrideAPtr = isStridedPtr(SE, DL, APtr, InnermostLoop, Strides); | int StrideAPtr = isStridedPtr(SE, DL, APtr, InnermostLoop, Strides); | ||||
int StrideBPtr = isStridedPtr(SE, DL, BPtr, InnermostLoop, Strides); | int StrideBPtr = isStridedPtr(SE, DL, BPtr, InnermostLoop, Strides); | ||||
const SCEV *Src = AScev; | const SCEV *Src = AScev; | ||||
const SCEV *Sink = BScev; | const SCEV *Sink = BScev; | ||||
▲ Show 20 Lines • Show All 214 Lines • ▼ Show 20 Lines | bool LoopVectorizationLegality::canVectorizeMemory() { | ||||
// Check if we see any stores. If there are no stores, then we don't | // Check if we see any stores. If there are no stores, then we don't | ||||
// care if the pointers are *restrict*. | // care if the pointers are *restrict*. | ||||
if (!Stores.size()) { | if (!Stores.size()) { | ||||
DEBUG(dbgs() << "LV: Found a read-only loop!\n"); | DEBUG(dbgs() << "LV: Found a read-only loop!\n"); | ||||
return true; | return true; | ||||
} | } | ||||
AccessAnalysis::DepCandidates DependentAccesses; | AccessAnalysis::DepCandidates DependentAccesses; | ||||
AccessAnalysis Accesses(DL, DependentAccesses); | AccessAnalysis Accesses(DL, AA, DependentAccesses); | ||||
// Holds the analyzed pointers. We don't want to call GetUnderlyingObjects | // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects | ||||
// multiple times on the same object. If the ptr is accessed twice, once | // multiple times on the same object. If the ptr is accessed twice, once | ||||
// for read and once for write, it will only appear once (on the write | // for read and once for write, it will only appear once (on the write | ||||
// list). This is okay, since we are going to check for conflicts between | // list). This is okay, since we are going to check for conflicts between | ||||
// writes and between reads and writes, but not between reads and reads. | // writes and between reads and writes, but not between reads and reads. | ||||
ValueSet Seen; | ValueSet Seen; | ||||
Show All 9 Lines | if (isUniform(Ptr)) { | ||||
DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n"); | DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n"); | ||||
return false; | return false; | ||||
} | } | ||||
// If we did *not* see this pointer before, insert it to the read-write | // If we did *not* see this pointer before, insert it to the read-write | ||||
// list. At this phase it is only a 'write' list. | // list. At this phase it is only a 'write' list. | ||||
if (Seen.insert(Ptr)) { | if (Seen.insert(Ptr)) { | ||||
++NumReadWrites; | ++NumReadWrites; | ||||
Accesses.addStore(Ptr); | |||||
AliasAnalysis::Location Loc = AA->getLocation(ST); | |||||
// The TBAA metadata could have a control dependency on the predication | |||||
// condition, so we cannot rely on it when determining whether or not we | |||||
// need runtime pointer checks. | |||||
if (blockNeedsPredication(ST->getParent())) | |||||
Loc.TBAATag = nullptr; | |||||
Accesses.addStore(Loc); | |||||
} | } | ||||
} | } | ||||
if (IsAnnotatedParallel) { | if (IsAnnotatedParallel) { | ||||
DEBUG(dbgs() | DEBUG(dbgs() | ||||
<< "LV: A loop annotated parallel, ignore memory dependency " | << "LV: A loop annotated parallel, ignore memory dependency " | ||||
<< "checks.\n"); | << "checks.\n"); | ||||
return true; | return true; | ||||
Show All 10 Lines | for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) { | ||||
// If the address of i is unknown (for example A[B[i]]) then we may | // If the address of i is unknown (for example A[B[i]]) then we may | ||||
// read a few words, modify, and write a few words, and some of the | // read a few words, modify, and write a few words, and some of the | ||||
// words may be written to the same address. | // words may be written to the same address. | ||||
bool IsReadOnlyPtr = false; | bool IsReadOnlyPtr = false; | ||||
if (Seen.insert(Ptr) || !isStridedPtr(SE, DL, Ptr, TheLoop, Strides)) { | if (Seen.insert(Ptr) || !isStridedPtr(SE, DL, Ptr, TheLoop, Strides)) { | ||||
++NumReads; | ++NumReads; | ||||
IsReadOnlyPtr = true; | IsReadOnlyPtr = true; | ||||
} | } | ||||
Accesses.addLoad(Ptr, IsReadOnlyPtr); | |||||
AliasAnalysis::Location Loc = AA->getLocation(LD); | |||||
// The TBAA metadata could have a control dependency on the predication | |||||
// condition, so we cannot rely on it when determining whether or not we | |||||
// need runtime pointer checks. | |||||
if (blockNeedsPredication(LD->getParent())) | |||||
Loc.TBAATag = nullptr; | |||||
Accesses.addLoad(Loc, IsReadOnlyPtr); | |||||
} | } | ||||
// If we write (or read-write) to a single destination and there are no | // If we write (or read-write) to a single destination and there are no | ||||
// other reads in this loop then is it safe to vectorize. | // other reads in this loop then is it safe to vectorize. | ||||
if (NumReadWrites == 1 && NumReads == 0) { | if (NumReadWrites == 1 && NumReads == 0) { | ||||
DEBUG(dbgs() << "LV: Found a write-only loop!\n"); | DEBUG(dbgs() << "LV: Found a write-only loop!\n"); | ||||
return true; | return true; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 1,168 Lines • ▼ Show 20 Lines | if (Scalar->isVoidTy() || VF == 1) | ||||
return Scalar; | return Scalar; | ||||
return VectorType::get(Scalar, VF); | return VectorType::get(Scalar, VF); | ||||
} | } | ||||
char LoopVectorize::ID = 0; | char LoopVectorize::ID = 0; | ||||
static const char lv_name[] = "Loop Vectorization"; | static const char lv_name[] = "Loop Vectorization"; | ||||
INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false) | INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false) | ||||
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) | INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) | ||||
INITIALIZE_AG_DEPENDENCY(AliasAnalysis) | |||||
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfo) | INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfo) | ||||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) | ||||
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) | INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) | ||||
INITIALIZE_PASS_DEPENDENCY(LCSSA) | INITIALIZE_PASS_DEPENDENCY(LCSSA) | ||||
INITIALIZE_PASS_DEPENDENCY(LoopInfo) | INITIALIZE_PASS_DEPENDENCY(LoopInfo) | ||||
INITIALIZE_PASS_DEPENDENCY(LoopSimplify) | INITIALIZE_PASS_DEPENDENCY(LoopSimplify) | ||||
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false) | INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false) | ||||
▲ Show 20 Lines • Show All 153 Lines • Show Last 20 Lines |