Index: include/llvm/Analysis/LoopAccessAnalysis.h =================================================================== --- include/llvm/Analysis/LoopAccessAnalysis.h +++ include/llvm/Analysis/LoopAccessAnalysis.h @@ -714,9 +714,10 @@ bool Assume = false); /// \brief Returns true if the memory operations \p A and \p B are consecutive. -/// This is a simple API that does not depend on the analysis pass. +/// This is a simple API that does not depend on the analysis pass. bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, - ScalarEvolution &SE, bool CheckType = true); + ScalarEvolution &SE, DominatorTree *DT, + bool CheckType = true); /// \brief This analysis provides dependence information for the memory accesses /// of a loop. Index: lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- lib/Analysis/LoopAccessAnalysis.cpp +++ lib/Analysis/LoopAccessAnalysis.cpp @@ -996,7 +996,8 @@ /// Returns true if the memory operations \p A and \p B are consecutive. bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, - ScalarEvolution &SE, bool CheckType) { + ScalarEvolution &SE, DominatorTree *DT, + bool CheckType) { Value *PtrA = getPointerOperand(A); Value *PtrB = getPointerOperand(B); unsigned ASA = getAddressSpaceOperand(A); @@ -1012,7 +1013,7 @@ // Make sure that A and B have the same type if required. if(CheckType && PtrA->getType() != PtrB->getType()) - return false; + return false; unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA); Type *Ty = cast(PtrA->getType())->getElementType(); @@ -1043,7 +1044,76 @@ const SCEV *PtrSCEVA = SE.getSCEV(PtrA); const SCEV *PtrSCEVB = SE.getSCEV(PtrB); const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta); - return X == PtrSCEVB; + if (X == PtrSCEVB) + return true; + + // Sometimes even this doesn't work, because SCEV can't always see through + // patterns that look like (gep (ext (add (shl X, C1), C2))). Try checking + // things the hard way. + + // Look through GEPs after checking they're the same except for the last + // index. + GetElementPtrInst *GEPA = dyn_cast(getPointerOperand(A)); + GetElementPtrInst *GEPB = dyn_cast(getPointerOperand(B)); + if (!GEPA || !GEPB || GEPA->getNumOperands() != GEPB->getNumOperands()) + return false; + unsigned FinalIndex = GEPA->getNumOperands() - 1; + for (unsigned i = 0; i < FinalIndex; i++) + if (GEPA->getOperand(i) != GEPB->getOperand(i)) + return false; + + Instruction *OpA = dyn_cast(GEPA->getOperand(FinalIndex)); + Instruction *OpB = dyn_cast(GEPB->getOperand(FinalIndex)); + if (!OpA || !OpB || OpA->getOpcode() != OpB->getOpcode() || + OpA->getType() != OpB->getType()) + return false; + + // Only look through a ZExt/SExt. + if (!isa(OpA) && !isa(OpA)) + return false; + + bool Signed = isa(OpA); + + OpA = dyn_cast(OpA->getOperand(0)); + OpB = dyn_cast(OpB->getOperand(0)); + if (!OpA || !OpB || OpA->getType() != OpB->getType()) + return false; + + // Now we need to prove that adding 1 to OpA won't overflow. + bool Safe = false; + // First attempt: if OpB is an add with NSW/NUW, and OpB is 1 added to OpA, + // we're okay. + if (OpB->getOpcode() == Instruction::Add && + isa(OpB->getOperand(1)) && + cast(OpB->getOperand(1))->getSExtValue() > 0) { + if (Signed) + Safe = cast(OpB)->hasNoSignedWrap(); + else + Safe = cast(OpB)->hasNoUnsignedWrap(); + } + + unsigned BitWidth = OpA->getType()->getScalarSizeInBits(); + + // Second attempt: + // If any bits are known to be zero other than the sign bit in OpA, we can + // add 1 to it while guaranteeing no overflow of any sort. + if (!Safe && DT) { + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + computeKnownBits(OpA, KnownZero, KnownOne, DL, 0, nullptr, OpA, DT); + KnownZero &= ~APInt::getHighBitsSet(BitWidth, 1); + if (KnownZero != 0) + Safe = true; + } + + if (!Safe) + return false; + + OffsetSCEVA = SE.getSCEV(OpA); + OffsetSCEVB = SE.getSCEV(OpB); + const SCEV *One = SE.getConstant(APInt(BitWidth, 1)); + const SCEV *X2 = SE.getAddExpr(OffsetSCEVA, One); + return X2 == OffsetSCEVB; } bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { Index: lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -593,7 +593,7 @@ assert((SecondSplatValue || SecondPatternValue) && "Expected either splat value or pattern value."); - if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) { + if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, nullptr, false)) { if (ForMemset) { if (FirstSplatValue != SecondSplatValue) continue; Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1173,7 +1173,7 @@ bool Consecutive = true; bool ReverseConsecutive = true; for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) { - if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) { + if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE, DT)) { Consecutive = false; break; } else { @@ -1192,7 +1192,7 @@ // check the reverse order. if (ReverseConsecutive) for (unsigned i = VL.size() - 1; i > 0; --i) - if (!isConsecutiveAccess(VL[i], VL[i - 1], *DL, *SE)) { + if (!isConsecutiveAccess(VL[i], VL[i - 1], *DL, *SE, DT)) { ReverseConsecutive = false; break; } @@ -1365,7 +1365,7 @@ case Instruction::Store: { // Check if the stores are consecutive or of we need to swizzle them. for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) - if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) { + if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE, DT)) { BS.cancelScheduling(VL); newTreeEntry(VL, false); DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); @@ -1967,11 +1967,11 @@ if (LoadInst *L1 = dyn_cast(Right[j + 1])) { Instruction *VL1 = cast(VL[j]); Instruction *VL2 = cast(VL[j + 1]); - if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) { + if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE, DT)) { std::swap(Left[j], Right[j]); continue; } else if (VL2->isCommutative() && - isConsecutiveAccess(L, L1, *DL, *SE)) { + isConsecutiveAccess(L, L1, *DL, *SE, DT)) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -1982,11 +1982,11 @@ if (LoadInst *L1 = dyn_cast(Left[j + 1])) { Instruction *VL1 = cast(VL[j]); Instruction *VL2 = cast(VL[j + 1]); - if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) { + if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE, DT)) { std::swap(Left[j], Right[j]); continue; } else if (VL2->isCommutative() && - isConsecutiveAccess(L, L1, *DL, *SE)) { + isConsecutiveAccess(L, L1, *DL, *SE, DT)) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -2134,7 +2134,7 @@ for (unsigned j = 0; j < VL.size() - 1; ++j) { if (LoadInst *L = dyn_cast(Left[j])) { if (LoadInst *L1 = dyn_cast(Right[j + 1])) { - if (isConsecutiveAccess(L, L1, *DL, *SE)) { + if (isConsecutiveAccess(L, L1, *DL, *SE, DT)) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -2142,7 +2142,7 @@ } if (LoadInst *L = dyn_cast(Right[j])) { if (LoadInst *L1 = dyn_cast(Left[j + 1])) { - if (isConsecutiveAccess(L, L1, *DL, *SE)) { + if (isConsecutiveAccess(L, L1, *DL, *SE, DT)) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -3738,7 +3738,7 @@ IndexQueue.push_back(j - 1); for (auto &k : IndexQueue) { - if (isConsecutiveAccess(Stores[i], Stores[k], *DL, *SE)) { + if (isConsecutiveAccess(Stores[i], Stores[k], *DL, *SE, DT)) { Tails.insert(Stores[k]); Heads.insert(Stores[i]); ConsecutiveChain[Stores[i]] = Stores[k]; Index: test/Transforms/SLPVectorizer/X86/consecutive-access.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/consecutive-access.ll +++ test/Transforms/SLPVectorizer/X86/consecutive-access.ll @@ -8,11 +8,10 @@ @D = common global [2000 x float] zeroinitializer, align 16 ; Currently SCEV isn't smart enough to figure out that accesses -; A[3*i], A[3*i+1] and A[3*i+2] are consecutive, but in future -; that would hopefully be fixed. For now, check that this isn't -; vectorized. +; A[3*i], A[3*i+1] and A[3*i+2] are consecutive, but llvm::isConsecutiveAccess +; can catch this. Check that this is now vectorized. ; CHECK-LABEL: foo_3double -; CHECK-NOT: x double> +; CHECK: x double> ; Function Attrs: nounwind ssp uwtable define void @foo_3double(i32 %u) #0 { entry: