Index: include/llvm/Analysis/LoopAccessAnalysis.h =================================================================== --- include/llvm/Analysis/LoopAccessAnalysis.h +++ include/llvm/Analysis/LoopAccessAnalysis.h @@ -715,10 +715,13 @@ const ValueToValueMap &StridesMap = ValueToValueMap(), bool Assume = false); -/// \brief Returns true if the memory operations \p A and \p B are consecutive. -/// This is a simple API that does not depend on the analysis pass. -bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, - ScalarEvolution &SE, bool CheckType = true); +/// \brief Returns 1 if the memory operations \p A and \p B are consecutive and +/// B follows A. Returns -1 if the memory operations \p A and \p B are +/// consecutive and A follows B. Otherwise, returns 0 to indicate \p A and \p B +/// are not known to be consecutive. +/// This is a simple API that does not depend on the analysis pass. +int isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, + ScalarEvolution &SE, bool CheckType = true); /// \brief This analysis provides dependence information for the memory accesses /// of a loop. Index: lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- lib/Analysis/LoopAccessAnalysis.cpp +++ lib/Analysis/LoopAccessAnalysis.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPassManager.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" @@ -29,6 +30,8 @@ #define DEBUG_TYPE "loop-accesses" +STATISTIC(ConsecutiveAccessQuery, "Number of isConsecutiveAccess queries."); + static cl::opt VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), @@ -1012,9 +1015,13 @@ return -1; } -/// Returns true if the memory operations \p A and \p B are consecutive. -bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, - ScalarEvolution &SE, bool CheckType) { +/// Returns 1 if the memory operations \p A and \p B are consecutive and B +/// follows A. Returns -1 if the memory operations \p A and \p B are consecutive +/// and A follows B. Otherwise, returns 0 to indicate \p A and \p B are not +/// known to be consecutive. +int llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, + ScalarEvolution &SE, bool CheckType) { + ++ConsecutiveAccessQuery; Value *PtrA = getPointerOperand(A); Value *PtrB = getPointerOperand(B); unsigned ASA = getAddressSpaceOperand(A); @@ -1022,15 +1029,15 @@ // Check that the address spaces match and that the pointers are valid. if (!PtrA || !PtrB || (ASA != ASB)) - return false; + return 0; // Make sure that A and B are different pointers. if (PtrA == PtrB) - return false; + return 0; // Make sure that A and B have the same type if required. if (CheckType && PtrA->getType() != PtrB->getType()) - return false; + return 0; unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA); Type *Ty = cast(PtrA->getType())->getElementType(); @@ -1040,16 +1047,23 @@ PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); - // OffsetDelta = OffsetB - OffsetA; + // OffsetDelta = OffsetB - OffsetA; const SCEV *OffsetSCEVA = SE.getConstant(OffsetA); const SCEV *OffsetSCEVB = SE.getConstant(OffsetB); const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA); - const SCEVConstant *OffsetDeltaC = dyn_cast(OffsetDeltaSCEV); - const APInt &OffsetDelta = OffsetDeltaC->getAPInt(); // Check if they are based on the same pointer. That makes the offsets // sufficient. - if (PtrA == PtrB) - return OffsetDelta == Size; + if (PtrA == PtrB) { + const APInt &OffsetDelta = cast(OffsetDeltaSCEV)->getAPInt(); + // Consecutive: B follows A. + if (OffsetDelta == Size) + return 1; + // Reverse consecutive: A follows B. + if (-OffsetDelta == Size) + return -1; + // Non-consecutive. + return 0; + } // Compute the necessary base pointer delta to have the necessary final delta // equal to the size. @@ -1061,7 +1075,19 @@ const SCEV *PtrSCEVA = SE.getSCEV(PtrA); const SCEV *PtrSCEVB = SE.getSCEV(PtrB); const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta); - return X == PtrSCEVB; + // Consecutive: B follows A. + if (X == PtrSCEVB) + return 1; + + OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA); + BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV); + X = SE.getAddExpr(PtrSCEVB, BaseDelta); + // Reverse consecutive: A follows B. + if (X == PtrSCEVA) + return -1; + + // Non-consecutive. + return 0; } bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { Index: lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -593,7 +593,7 @@ assert((SecondSplatValue || SecondPatternValue) && "Expected either splat value or pattern value."); - if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) { + if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false) > 0) { if (ForMemset) { if (FirstSplatValue != SecondSplatValue) continue; Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1180,12 +1180,21 @@ bool Consecutive = true; bool ReverseConsecutive = true; for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) { - if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) { + int CA = isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE); + bool IsConsecutive = CA > 0; + bool IsReverseConsecutive = CA < 0; + + if (IsConsecutive) + ReverseConsecutive = false; + else if (IsReverseConsecutive) + Consecutive = false; + else { Consecutive = false; - break; - } else { ReverseConsecutive = false; + break; } + if (!Consecutive && !ReverseConsecutive) + break; } if (Consecutive) { @@ -1370,9 +1379,9 @@ return; } case Instruction::Store: { - // Check if the stores are consecutive or of we need to swizzle them. + // Check if the stores are consecutive or if we need to swizzle them. for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) - if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) { + if (isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE) <= 0) { BS.cancelScheduling(VL); newTreeEntry(VL, false); DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); @@ -1987,7 +1996,7 @@ if (LoadInst *L1 = dyn_cast(Right[j + 1])) { Instruction *VL1 = cast(VL[j]); Instruction *VL2 = cast(VL[j + 1]); - if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) { + if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE) > 0) { std::swap(Left[j], Right[j]); continue; } else if (VL2->isCommutative() && @@ -2002,7 +2011,7 @@ if (LoadInst *L1 = dyn_cast(Left[j + 1])) { Instruction *VL1 = cast(VL[j]); Instruction *VL2 = cast(VL[j + 1]); - if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) { + if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE) > 0) { std::swap(Left[j], Right[j]); continue; } else if (VL2->isCommutative() && @@ -2154,7 +2163,7 @@ for (unsigned j = 0; j < VL.size() - 1; ++j) { if (LoadInst *L = dyn_cast(Left[j])) { if (LoadInst *L1 = dyn_cast(Right[j + 1])) { - if (isConsecutiveAccess(L, L1, *DL, *SE)) { + if (isConsecutiveAccess(L, L1, *DL, *SE) > 0) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -2162,7 +2171,7 @@ } if (LoadInst *L = dyn_cast(Right[j])) { if (LoadInst *L1 = dyn_cast(Left[j + 1])) { - if (isConsecutiveAccess(L, L1, *DL, *SE)) { + if (isConsecutiveAccess(L, L1, *DL, *SE) > 0) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -3762,7 +3771,7 @@ IndexQueue.push_back(j - 1); for (auto &k : IndexQueue) { - if (isConsecutiveAccess(Stores[i], Stores[k], *DL, *SE)) { + if (isConsecutiveAccess(Stores[i], Stores[k], *DL, *SE) > 0) { Tails.insert(Stores[k]); Heads.insert(Stores[i]); ConsecutiveChain[Stores[i]] = Stores[k];