Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -5489,6 +5489,22 @@ !0 = !{!"llvm.loop.vectorize.predicate.enable", i1 0} !1 = !{!"llvm.loop.vectorize.predicate.enable", i1 1} +'``llvm.loop.vectorize.ivdep.enable``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This metadata indicates to the vectorizer to ignore dependencies between +memory accesses which have not been determined to be either safe or unsafe +for vectorization. This differs from ``llvm.loop.parallel_access``, which +considers no dependencies to be present between memory accesses belonging +to the same access group. The first operand is the string +``llvm.loop.vectorize.ivdep.enable`` and the second operand is a bit. A +value of 1 implies that the functionality of this metadata is enabled for +the loop. + +.. code-block:: llvm + + !0 = !{!"llvm.loop.vectorize.ivdep.enable", i1 1} + '``llvm.loop.vectorize.width``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: llvm/include/llvm/Analysis/LoopAccessAnalysis.h =================================================================== --- llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -201,7 +201,7 @@ /// /// Only checks sets with elements in \p CheckDeps. bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, - const ValueToValueMap &Strides); + const ValueToValueMap &Strides, bool UnknownDepHint); /// No memory dependence was encountered that would inhibit /// vectorization. @@ -516,7 +516,8 @@ class LoopAccessInfo { public: LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, - AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI); + AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, + bool UnknownDepHint = false); /// Return true we can analyze the memory accesses in the loop and there are /// no memory dependence cycles. @@ -608,7 +609,8 @@ private: /// Analyze the loop. void analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, - const TargetLibraryInfo *TLI, DominatorTree *DT); + const TargetLibraryInfo *TLI, DominatorTree *DT, + bool UnknownDepHint); /// Check if the structure of the loop allows it to be analyzed by this /// pass. @@ -735,7 +737,7 @@ /// Query the result of the loop access information for the loop \p L. /// /// If there is no cached result available run the analysis. - const LoopAccessInfo &getInfo(Loop *L); + const LoopAccessInfo &getInfo(Loop *L, bool UnknownDepHint = false); void releaseMemory() override { // Invalidate the cache when the pass is freed. Index: llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h =================================================================== --- llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -44,7 +44,7 @@ /// careful NOT to add them if the user hasn't specifically asked so. class LoopVectorizeHints { enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED, - HK_PREDICATE }; + HK_PREDICATE, HK_IVDEP }; /// Hint - associates name and validation with the hint value. struct Hint { @@ -73,6 +73,9 @@ /// Vector Predicate Hint Predicate; + /// Ignore Vector dependencies + Hint Ivdep; + /// Return the loop metadata prefix. static StringRef Prefix() { return "llvm.loop."; } @@ -102,6 +105,7 @@ unsigned getInterleave() const { return Interleave.Value; } unsigned getIsVectorized() const { return IsVectorized.Value; } unsigned getPredicate() const { return Predicate.Value; } + unsigned getIvdep() const { return Ivdep.Value; } enum ForceKind getForce() const { if ((ForceKind)Force.Value == FK_Undefined && hasDisableAllTransformsHint(TheLoop)) @@ -199,7 +203,7 @@ LoopVectorizationLegality( Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AliasAnalysis *AA, - Function *F, std::function *GetLAA, + Function *F, std::function *GetLAA, LoopInfo *LI, OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC) @@ -405,7 +409,7 @@ DominatorTree *DT; // LoopAccess analysis. - std::function *GetLAA; + std::function *GetLAA; // And the loop-accesses info corresponding to this loop. This pointer is // null until canVectorizeMemory sets it up. Index: llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h =================================================================== --- llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h +++ llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h @@ -138,7 +138,7 @@ DemandedBits *DB; AliasAnalysis *AA; AssumptionCache *AC; - std::function *GetLAA; + std::function *GetLAA; OptimizationRemarkEmitter *ORE; ProfileSummaryInfo *PSI; @@ -149,7 +149,7 @@ TargetTransformInfo &TTI_, DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_, DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_, - std::function &GetLAA_, + std::function &GetLAA_, OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_); bool processLoop(Loop *L); Index: llvm/lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1633,10 +1633,12 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, - const ValueToValueMap &Strides) { + const ValueToValueMap &Strides, + bool UnknownDepHint) { MaxSafeDepDistBytes = -1; SmallPtrSet Visited; + Status = VectorizationSafetyStatus::Safe; for (MemAccessInfo CurAccess : CheckDeps) { if (Visited.count(CurAccess)) continue; @@ -1678,7 +1680,13 @@ Dependence::DepType Type = isDependent(*A.first, A.second, *B.first, B.second, Strides); - mergeInStatus(Dependence::isSafeForVectorization(Type)); + // Update safety status depending on whether the Dependence type + // is safe. If Unknown Dependence type is to be considered safe, + // do not update safety status. + if (!UnknownDepHint || + !(Dependence::isSafeForVectorization(Type) == + VectorizationSafetyStatus::PossiblySafeWithRtChecks)) + mergeInStatus(Dependence::isSafeForVectorization(Type)); // Gather dependences unless we accumulated MaxDependences // dependences. In that case return as soon as we find the first @@ -1788,7 +1796,8 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, const TargetLibraryInfo *TLI, - DominatorTree *DT) { + DominatorTree *DT, + bool UnknownDepHint) { typedef SmallPtrSet ValueSet; // Holds the Load and Store instructions. @@ -2022,7 +2031,8 @@ if (Accesses.isDependencyCheckNeeded()) { LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); CanVecMem = DepChecker->areDepsSafe( - DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides); + DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides, + UnknownDepHint); MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes(); if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) { @@ -2343,7 +2353,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, - DominatorTree *DT, LoopInfo *LI) + DominatorTree *DT, LoopInfo *LI, + bool UnknownDepHint) : PSE(std::make_unique(*SE, *L)), PtrRtChecking(std::make_unique(SE)), DepChecker(std::make_unique(*PSE, L)), TheLoop(L), @@ -2351,7 +2362,7 @@ HasConvergentOp(false), HasDependenceInvolvingLoopInvariantAddress(false) { if (canAnalyzeLoop()) - analyzeLoop(AA, LI, TLI, DT); + analyzeLoop(AA, LI, TLI, DT, UnknownDepHint); } void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { @@ -2397,11 +2408,13 @@ PSE->print(OS, Depth); } -const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) { +const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L, + bool UnknownDepHint) { auto &LAI = LoopAccessInfoMap[L]; if (!LAI) - LAI = std::make_unique(L, SE, TLI, AA, DT, LI); + LAI = std::make_unique(L, SE, TLI, AA, DT, LI, + UnknownDepHint); return *LAI.get(); } Index: llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -61,6 +61,8 @@ case HK_ISVECTORIZED: case HK_PREDICATE: return (Val == 0 || Val == 1); + case HK_IVDEP: + return (Val == 1); } return false; } @@ -72,7 +74,8 @@ Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL), Force("vectorize.enable", FK_Undefined, HK_FORCE), IsVectorized("isvectorized", 0, HK_ISVECTORIZED), - Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L), + Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), + Ivdep("vectorize.ivdep.enable", 0, HK_IVDEP), TheLoop(L), ORE(ORE) { // Populate values with existing loop metadata. getHintsFromMetadata(); @@ -224,7 +227,8 @@ return; unsigned Val = C->getZExtValue(); - Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate}; + Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate, + &Ivdep}; for (auto H : Hints) { if (Name == H->Name) { if (H->validate(Val)) @@ -825,7 +829,7 @@ } bool LoopVectorizationLegality::canVectorizeMemory() { - LAI = &(*GetLAA)(*TheLoop); + LAI = &(*GetLAA)(*TheLoop, Hints->getIvdep()); const OptimizationRemarkAnalysis *LAR = LAI->getReport(); if (LAR) { ORE->emit([&]() { Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1610,8 +1610,10 @@ auto *ORE = &getAnalysis().getORE(); auto *PSI = &getAnalysis().getPSI(); - std::function GetLAA = - [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); }; + std::function GetLAA = + [&](Loop &L, bool UnknownDepHint) -> const LoopAccessInfo & + { return LAA-> + getInfo(&L, UnknownDepHint); }; return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC, GetLAA, *ORE, PSI); @@ -7800,7 +7802,7 @@ Function &F, ScalarEvolution &SE_, LoopInfo &LI_, TargetTransformInfo &TTI_, DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_, DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_, - std::function &GetLAA_, + std::function &GetLAA_, OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_) { SE = &SE_; LI = &LI_; @@ -7879,8 +7881,8 @@ : nullptr; auto &LAM = AM.getResult(F).getManager(); - std::function GetLAA = - [&](Loop &L) -> const LoopAccessInfo & { + std::function GetLAA = + [&](Loop &L, bool UnknownDepHint) -> const LoopAccessInfo & { LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA}; return LAM.getResult(L, AR); }; Index: llvm/test/Transforms/LoopVectorize/X86/ivdep-alias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/X86/ivdep-alias.ll @@ -0,0 +1,74 @@ +; RUN: opt < %s -O3 -S | FileCheck %s +; IR generated for a function containing the loop: +; #pragma clang loop ivdep(enable) +; for (int i=0; i