diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -244,7 +244,7 @@ SmallVector getInstructionsForAccess(Value *Ptr, bool isWrite) const; -private: + // private: /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and /// applies dynamic knowledge to simplify SCEV expressions and convert them /// to a more usable form. We need this in case assumptions about SCEV @@ -392,7 +392,8 @@ AliasSetId(AliasSetId), Expr(Expr) {} }; - RuntimePointerChecking(ScalarEvolution *SE) : SE(SE) {} + RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE) + : DC(DC), SE(SE) {} /// Reset the state of the pointer runtime information. void reset() { @@ -423,6 +424,12 @@ return Checks; } + Optional>> getDiffChecks() const { + if (!CanUseDiffCheck) + return None; + return {DiffChecks}; + } + /// Decide if we need to add a check between two groups of pointers, /// according to needsChecking. bool needsChecking(const RuntimeCheckingPtrGroup &M, @@ -477,7 +484,9 @@ bool UseDependencies); /// Generate the checks and return them. - SmallVector generateChecks() const; + SmallVector generateChecks(); + + MemoryDepChecker &DC; /// Holds a pointer to the ScalarEvolution analysis. ScalarEvolution *SE; @@ -485,6 +494,12 @@ /// Set of run-time checks required to establish independence of /// otherwise may-aliasing pointers in the loop. SmallVector Checks; + + bool CanUseDiffCheck = true; + SmallVector> DiffChecks; + + void tryToCreateDiffCheck(const RuntimeCheckingPtrGroup &CGI, + const RuntimeCheckingPtrGroup &CGJ); }; /// Drive the analysis of memory accesses in the loop diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -497,6 +497,11 @@ const SmallVectorImpl &PointerChecks, SCEVExpander &Expander); +Value *addDiffRuntimeChecks(Instruction *Loc, Loop *TheLoop, + ArrayRef> Checks, + SCEVExpander &Expander, ElementCount VF, + unsigned IC); + /// Struct to hold information about a partially invariant condition. struct IVConditionInfo { /// Instructions that need to be duplicated and checked for the unswitching diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -234,8 +234,74 @@ Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc); } -SmallVector -RuntimePointerChecking::generateChecks() const { +void RuntimePointerChecking::tryToCreateDiffCheck( + const RuntimeCheckingPtrGroup &CGI, const RuntimeCheckingPtrGroup &CGJ) { + if (!CanUseDiffCheck) + return; + if (CGI.Members.size() != 1 || CGJ.Members.size() != 1) { + CanUseDiffCheck = false; + return; + } + + auto &PtrI = Pointers[CGI.Members[0]]; + auto &PtrJ = Pointers[CGJ.Members[0]]; + auto &AccI = DC.Accesses[MemoryDepChecker::MemAccessInfo(PtrI.PointerValue, + PtrI.IsWritePtr)]; + auto &AccJ = DC.Accesses[MemoryDepChecker::MemAccessInfo(PtrJ.PointerValue, + PtrJ.IsWritePtr)]; + + if (DC.Accesses.find(MemoryDepChecker::MemAccessInfo( + PtrI.PointerValue, !PtrI.IsWritePtr)) != DC.Accesses.end() || + DC.Accesses.find(MemoryDepChecker::MemAccessInfo( + PtrJ.PointerValue, !PtrJ.IsWritePtr)) != DC.Accesses.end()) { + + CanUseDiffCheck = false; + return; + } + if (AccI.size() != 1 || AccJ.size() != 1) { + CanUseDiffCheck = false; + return; + } + auto *Src = &PtrI; + auto *Sink = &PtrJ; + if (AccJ[0] < AccI[0]) + std::swap(Src, Sink); + + auto *SrcAR = dyn_cast(Src->Expr); + auto *SinkAR = dyn_cast(Sink->Expr); + if (!SrcAR || !SinkAR) { + CanUseDiffCheck = false; + return; + } + + const DataLayout &DL = + SinkAR->getLoop()->getHeader()->getModule()->getDataLayout(); + unsigned AllocSize = DL.getTypeAllocSize( + PtrI.PointerValue->getType()->getPointerElementType()); + auto *IntTy = IntegerType::get(Src->PointerValue->getContext(), + DL.getPointerSizeInBits(CGI.AddressSpace)); + auto *Step = dyn_cast(SinkAR->getStepRecurrence(*SE)); + if (!Step || Step != SrcAR->getStepRecurrence(*SE) || + Step->getAPInt().abs() != AllocSize) { + CanUseDiffCheck = false; + return; + } + + if (Step->getValue()->isNegative()) + std::swap(SinkAR, SrcAR); + + auto *SinkStartInt = SE->getPtrToIntExpr(SinkAR->getStart(), IntTy); + auto *SrcStartInt = SE->getPtrToIntExpr(SrcAR->getStart(), IntTy); + if (isa(SinkStartInt) || + isa(SrcStartInt)) { + CanUseDiffCheck = false; + return; + } + auto *Diff = SE->getMinusSCEV(SinkStartInt, SrcStartInt); + DiffChecks.emplace_back(Diff, AllocSize); +} + +SmallVector RuntimePointerChecking::generateChecks() { SmallVector Checks; for (unsigned I = 0; I < CheckingGroups.size(); ++I) { @@ -243,8 +309,10 @@ const RuntimeCheckingPtrGroup &CGI = CheckingGroups[I]; const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J]; - if (needsChecking(CGI, CGJ)) + if (needsChecking(CGI, CGJ)) { + tryToCreateDiffCheck(CGI, CGJ); Checks.push_back(std::make_pair(&CGI, &CGJ)); + } } } return Checks; @@ -2290,10 +2358,12 @@ const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI) : PSE(std::make_unique(*SE, *L)), - PtrRtChecking(std::make_unique(SE)), + PtrRtChecking(nullptr), DepChecker(std::make_unique(*PSE, L)), TheLoop(L) { - if (canAnalyzeLoop()) + if (canAnalyzeLoop()) { + PtrRtChecking = std::make_unique(*DepChecker, SE); analyzeLoop(AA, LI, TLI, DT); + } } void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1614,6 +1614,37 @@ return MemoryRuntimeCheck; } +Value * +llvm::addDiffRuntimeChecks(Instruction *Loc, Loop *TheLoop, + ArrayRef> Checks, + SCEVExpander &Expander, ElementCount VF, + unsigned IC) { + + LLVMContext &Ctx = Loc->getContext(); + IRBuilder ChkBuilder(Ctx, + Loc->getModule()->getDataLayout()); + ChkBuilder.SetInsertPoint(Loc); + // Our instructions might fold to a constant. + Value *MemoryRuntimeCheck = nullptr; + + for (auto &C : Checks) { + Type *Ty = C.first->getType(); + auto *VFTimesUFTimesSize = + ConstantInt::get(Ty, VF.getFixedValue() * IC * C.second); + auto *Diff = Expander.expandCodeFor(C.first, Ty, Loc); + Value *IsConflict = + ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check"); + + if (MemoryRuntimeCheck) { + IsConflict = + ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx"); + } + MemoryRuntimeCheck = IsConflict; + } + + return MemoryRuntimeCheck; +} + Optional llvm::hasPartialIVCondition(Loop &L, unsigned MSSAThreshold, MemorySSA &MSSA, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2017,7 +2017,8 @@ /// there is no vector code generation, the check blocks are removed /// completely. void Create(Loop *L, const LoopAccessInfo &LAI, - const SCEVUnionPredicate &UnionPred) { + const SCEVUnionPredicate &UnionPred, ElementCount VF, + unsigned IC) { BasicBlock *LoopHeader = L->getHeader(); BasicBlock *Preheader = L->getLoopPreheader(); @@ -2040,9 +2041,17 @@ MemCheckBlock = SplitBlock(Pred, Pred->getTerminator(), DT, LI, nullptr, "vector.memcheck"); - MemRuntimeCheckCond = - addRuntimeChecks(MemCheckBlock->getTerminator(), L, - RtPtrChecking.getChecks(), MemCheckExp); + auto DiffChecks = RtPtrChecking.getDiffChecks(); + if (!VF.isScalable() && DiffChecks) { + MemRuntimeCheckCond = + addDiffRuntimeChecks(MemCheckBlock->getTerminator(), L, *DiffChecks, + MemCheckExp, VF, IC); + + } else { + MemRuntimeCheckCond = + addRuntimeChecks(MemCheckBlock->getTerminator(), L, + RtPtrChecking.getChecks(), MemCheckExp); + } assert(MemRuntimeCheckCond && "no RT checks generated although RtPtrChecking " "claimed checks are required"); @@ -3214,13 +3223,15 @@ AddedSafetyChecks = true; - // We currently don't use LoopVersioning for the actual loop cloning but we - // still use it to add the noalias metadata. + // if (!Legal->getLAI()->getRuntimePointerChecking()->getDiffChecks()) { + // We currently don't use LoopVersioning for the actual loop cloning but we + // still use it to add the noalias metadata. LVer = std::make_unique( *Legal->getLAI(), Legal->getLAI()->getRuntimePointerChecking()->getChecks(), OrigLoop, LI, DT, PSE.getSE()); LVer->prepareNoAliasMetadata(); + //} return MemCheckBlock; } @@ -10564,7 +10575,7 @@ GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, F->getParent()->getDataLayout()); if (!VF.Width.isScalar() || IC > 1) - Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate()); + Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate(), VF.Width, IC); using namespace ore; if (!VectorizeLoop) { diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll @@ -7,18 +7,16 @@ define void @arm_abs_q7(i8* nocapture readonly %pSrc, i8* nocapture %pDst, i32 %blockSize) #0 { ; CHECK-LABEL: @arm_abs_q7( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[PSRC2:%.*]] = ptrtoint i8* [[PSRC:%.*]] to i32 +; CHECK-NEXT: [[PDST1:%.*]] = ptrtoint i8* [[PDST:%.*]] to i32 ; CHECK-NEXT: [[CMP_NOT19:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP_NOT19]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK: while.body.preheader: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 16 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; CHECK: vector.memcheck: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[PDST:%.*]], i32 [[BLOCKSIZE]] -; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, i8* [[PSRC:%.*]], i32 [[BLOCKSIZE]] -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i8* [[SCEVGEP1]], [[PDST]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i8* [[SCEVGEP]], [[PSRC]] -; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[PDST1]], [[PSRC2]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP0]], 16 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -16 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* [[PSRC]], i32 [[N_VEC]] @@ -45,9 +43,9 @@ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ], [ [[PSRC]], [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ], [ [[BLOCKSIZE]], [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i8* [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ], [ [[PDST]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i8* [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[PSRC_ADDR_022:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -102,18 +100,16 @@ define void @arm_abs_q15(i16* nocapture readonly %pSrc, i16* nocapture %pDst, i32 %blockSize) #0 { ; CHECK-LABEL: @arm_abs_q15( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[PSRC2:%.*]] = ptrtoint i16* [[PSRC:%.*]] to i32 +; CHECK-NEXT: [[PDST1:%.*]] = ptrtoint i16* [[PDST:%.*]] to i32 ; CHECK-NEXT: [[CMP_NOT20:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP_NOT20]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK: while.body.preheader: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; CHECK: vector.memcheck: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[PDST:%.*]], i32 [[BLOCKSIZE]] -; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i16, i16* [[PSRC:%.*]], i32 [[BLOCKSIZE]] -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i16* [[SCEVGEP4]], [[PDST]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i16* [[SCEVGEP]], [[PSRC]] -; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[PDST1]], [[PSRC2]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP0]], 16 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -8 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i16, i16* [[PSRC]], i32 [[N_VEC]] @@ -140,9 +136,9 @@ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ], [ [[PSRC]], [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i32 [ [[IND_END7]], [[MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ], [ [[BLOCKSIZE]], [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i16* [ [[IND_END9]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ], [ [[PDST]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i32 [ [[IND_END7]], [[MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i16* [ [[IND_END9]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[PSRC_ADDR_023:%.*]] = phi i16* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -197,18 +193,16 @@ define void @arm_abs_q31(i32* nocapture readonly %pSrc, i32* nocapture %pDst, i32 %blockSize) #0 { ; CHECK-LABEL: @arm_abs_q31( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[PSRC2:%.*]] = ptrtoint i32* [[PSRC:%.*]] to i32 +; CHECK-NEXT: [[PDST1:%.*]] = ptrtoint i32* [[PDST:%.*]] to i32 ; CHECK-NEXT: [[CMP_NOT14:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP_NOT14]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK: while.body.preheader: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; CHECK: vector.memcheck: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[PDST:%.*]], i32 [[BLOCKSIZE]] -; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[PSRC:%.*]], i32 [[BLOCKSIZE]] -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[PDST]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[PSRC]] -; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[PDST1]], [[PSRC2]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP0]], 16 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -4 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[PSRC]], i32 [[N_VEC]] @@ -235,9 +229,9 @@ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ], [ [[PSRC]], [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i32 [ [[IND_END7]], [[MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ], [ [[BLOCKSIZE]], [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32* [ [[IND_END9]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ], [ [[PDST]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i32 [ [[IND_END7]], [[MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32* [ [[IND_END9]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[PSRC_ADDR_017:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll b/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll @@ -29,32 +29,19 @@ define void @Test(%struct.s* nocapture %obj, i64 %z) #0 { ; CHECK-LABEL: @Test( -; CHECK-NEXT: [[OBJ4:%.*]] = bitcast %struct.s* [[OBJ:%.*]] to i8* -; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.s* [[OBJ]], i64 0, i32 0, i64 [[Z:%.*]] -; CHECK-NEXT: [[SCEVGEP56:%.*]] = bitcast i32* [[SCEVGEP5]] to i8* ; CHECK-NEXT: br label [[DOTOUTER_PREHEADER:%.*]] ; CHECK: .outer.preheader: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[I_NEXT:%.*]], [[DOTOUTER:%.*]] ] -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 2, i64 [[I]], i64 0 -; CHECK-NEXT: [[SCEVGEP1:%.*]] = bitcast i32* [[SCEVGEP]] to i8* -; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 2, i64 [[I]], i64 [[Z]] -; CHECK-NEXT: [[SCEVGEP23:%.*]] = bitcast i32* [[SCEVGEP2]] to i8* -; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 1, i64 [[I]] -; CHECK-NEXT: [[SCEVGEP78:%.*]] = bitcast i32* [[SCEVGEP7]] to i8* -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[I]], 1 -; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 1, i64 [[TMP1]] -; CHECK-NEXT: [[SCEVGEP910:%.*]] = bitcast i32* [[SCEVGEP9]] to i8* -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 1, i64 [[I]] -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[Z]], 4 +; CHECK-NEXT: [[SHL1:%.*]] = shl nuw nsw i64 [[I]], 7 +; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[SHL1]], 256 +; CHECK-NEXT: [[ADD2:%.*]] = add i64 [[SHL1]], 128 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S:%.+]], %struct.s* [[OBJ:%.+]], i64 0, i32 1, i64 [[I]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[Z:%.+]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP56]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[OBJ4]], [[SCEVGEP23]] -; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: [[BOUND011:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP910]] -; CHECK-NEXT: [[BOUND112:%.*]] = icmp ult i8* [[SCEVGEP78]], [[SCEVGEP23]] -; CHECK-NEXT: [[FOUND_CONFLICT13:%.*]] = and i1 [[BOUND011]], [[BOUND112]] -; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT13]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[ADD1]], 16 +; CHECK-NEXT: [[DIFF_CHECK1:%.*]] = icmp ult i64 [[ADD2]], 16 +; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK1]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[Z]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll --- a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll @@ -338,8 +338,8 @@ ; CHECK-LABEL: vector.body: ; CHECK: %wide.load = load <2 x i32>, <2 x i32>* -; CHECK: %wide.load16 = load <2 x i32>, <2 x i32>* -; CHECK: [[ADD:%[a-zA-Z0-9.]+]] = add nsw <2 x i32> %wide.load, %wide.load16 +; CHECK: %wide.load5 = load <2 x i32>, <2 x i32>* +; CHECK: [[ADD:%[a-zA-Z0-9.]+]] = add nsw <2 x i32> %wide.load, %wide.load5 ; CHECK: store <2 x i32> ; CHECK-LABEL: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll --- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll @@ -13,7 +13,9 @@ define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtable ssp { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0, [[DBG4:!dbg !.*]] +; CHECK-NEXT: [[B2:%.*]] = ptrtoint float* [[B:%.*]] to i64, [[DBG4:!dbg !.*]] +; CHECK-NEXT: [[A1:%.*]] = ptrtoint float* [[A:%.*]] to i64, [[DBG4]] +; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0, [[DBG4]] ; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]], [[DBG4]] ; CHECK: for.body.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1, [[DBG9:!dbg !.*]] @@ -22,15 +24,9 @@ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3, [[DBG9]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]], [[DBG9]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1, [[DBG9]] -; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64, [[DBG9]] -; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1, [[DBG9]] -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 [[TMP5]], [[DBG9]] -; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[B:%.*]], i64 [[TMP5]], [[DBG9]] -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt float* [[SCEVGEP4]], [[A]], [[DBG9]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt float* [[SCEVGEP]], [[B]], [[DBG9]] -; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]], [[DBG9]] -; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]], [[DBG9]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[A1]], [[B2]], [[DBG9]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], 16, [[DBG9]] +; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]], [[DBG9]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588, [[DBG9]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]], [[DBG9]]