Index: lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- lib/Transforms/Scalar/IndVarSimplify.cpp +++ lib/Transforms/Scalar/IndVarSimplify.cpp @@ -31,6 +31,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -42,6 +43,7 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -2101,46 +2103,61 @@ return Phi != getLoopPhiForCounter(IncV, L); } -/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils -/// down to checking that all operands are constant and listing instructions -/// that may hide undef. -static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl &Visited, - unsigned Depth) { - if (isa(V)) - return !isa(V); - - if (Depth >= 6) - return false; - - // Conservatively handle non-constant non-instructions. For example, Arguments - // may be undef. - Instruction *I = dyn_cast(V); - if (!I) - return false; - - // Load and return values may be undef. - if(I->mayReadFromMemory() || isa(I) || isa(I)) - return false; +/// Return true undefined behavior would provable be executed on all iterations +/// for which Root produced a poison result, and the loop exit condition for +/// ExitingBB is evaluated. Note that this doesn't say anything about whether +/// LoopExiting BB is actually executed or taken. This can be used to assess +/// whether a new use of Root can be added before ExitingBB's terminator without +/// introducing UB which didn't previously exist. Note that a false result +/// conveys no information. +static bool programUndefinedIfFullPoisonAndExit(Instruction *Root, + BasicBlock *ExitingBB, + DominatorTree *DT) { + // Basic approach is to assume Root is poison, propagate poison forward + // through all users we can easily track, and then check whether any of those + // users are provable UB and must execute before out exiting block might + // exit. + + // The set of all recursive users we've visited (which are assumed to all be + // poison because of said visit) + SmallSet Visited; + SmallVector Worklist; + Worklist.push_back(Root); + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + dbgs() << "explore: " << *I << "\n"; + if (!Visited.insert(I).second) + continue; - // Optimistically handle other instructions. - for (Value *Op : I->operands()) { - if (!Visited.insert(Op).second) + // If we can't analyze propagation through this instruction, just skip it + // and transitive users. Safe as false is a conservative result. + if (!propagatesFullPoison(I) && !isa(I)) { + dbgs() << "no full: " << *I << "\n"; continue; - if (!hasConcreteDefImpl(Op, Visited, Depth+1)) - return false; + } + + dbgs() << "users: " << *I << "\n"; + for (User *User : I->users()) + Worklist.push_back(cast(User)); } - return true; -} -/// Return true if the given value is concrete. We must prove that undef can -/// never reach it. -/// -/// TODO: If we decide that this is a good approach to checking for undef, we -/// may factor it into a common location. -static bool hasConcreteDef(Value *V) { - SmallPtrSet Visited; - Visited.insert(V); - return hasConcreteDefImpl(V, Visited, 0); + for (Instruction *PoisonI : Visited) { + dbgs() << *PoisonI << " ub?\n"; + if (isa(PoisonI)) + return true; + if (PoisonI == ExitingBB->getTerminator()) + return true; + // If we know this must trigger UB on a path leading to our exit, we're + // done. + auto *NotPoison = + dyn_cast_or_null(getGuaranteedNonFullPoisonOp(PoisonI)); + if (NotPoison != nullptr && Visited.count(NotPoison) && + DT->dominates(NotPoison, ExitingBB->getTerminator())) + return true; + } + // Might be non-UB, or might have a path we couldn't prove must execute on + // way to exiting bb. + return false; } /// Return true if this IV has any uses other than the (soon to be rewritten) @@ -2189,7 +2206,7 @@ /// valid count without scaling the address stride, so it remains a pointer /// expression as far as SCEV is concerned. static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount, - ScalarEvolution *SE) { + ScalarEvolution *SE, DominatorTree *DT) { uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); Value *Cond = @@ -2204,15 +2221,20 @@ for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) { PHINode *Phi = cast(I); + dbgs() << *Phi << "\n"; if (!isLoopCounter(Phi, L, SE)) continue; + dbgs() << "counter \n"; + + // Avoid comparing an integer IV against a pointer Limit. if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) continue; const auto *AR = dyn_cast(SE->getSCEV(Phi)); + dbgs() << "add rec\n"; // AR may be a pointer type, while BECount is an integer type. // AR may be wider than BECount. With eq/ne tests overflow is immaterial. // AR may not be a narrower type, or we may never exit. @@ -2220,9 +2242,24 @@ if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) continue; + dbgs() << "bitwidth\n"; + + const SCEV *Init = AR->getStart(); + + // When swiching IVs, we have to ensure that we don't introduce UB which + // didn't exist in the original program by introducing a use on an + // iteration where said IV produces poison. Our strategy here differs for + // pointers and integer IVs. For integers, we strip and reinfer as needed, + // see code in linearFunctionTestReplace. For pointers, there's no good + // way to reinfer inbounds, so we avoid selecting a pointer IV where we + // might need to. + bool CanStripToEliminatePoison = isa(Init) && + Phi->getType()->isIntegerTy(); + // Avoid reusing a potentially undef value to compute other values that may // have originally had a concrete definition. - if (!hasConcreteDef(Phi)) { + if (!CanStripToEliminatePoison && + !programUndefinedIfFullPoisonAndExit(Phi, L->getExitingBlock(), DT)) { // We explicitly allow unknown phis as long as they are already used by // the loop test. In this case we assume that performing LFTR could not // increase the number of undef users. @@ -2233,7 +2270,7 @@ } } } - const SCEV *Init = AR->getStart(); + dbgs() << "concrete\n"; if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { // Don't force a live loop counter if another IV can be used. @@ -2359,8 +2396,10 @@ // If the exiting block is the same as the backedge block, we prefer to // compare against the post-incremented value, otherwise we must compare - // against the preincremented value. - if (L->getExitingBlock() == L->getLoopLatch()) { + // against the preincremented value. Note that we chose to only do this for + // integers as we can't recompute inbounds on a gep. + if (CmpIndVar->getType()->isIntegerTy() && + L->getExitingBlock() == L->getLoopLatch()) { // Add one to the "backedge-taken" count to get the trip count. // This addition may overflow, which is valid as long as the comparison is // truncated to BackedgeTakenCount->getType(). @@ -2648,7 +2687,7 @@ // using it. We can currently only handle loops with a single exit. if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L)) { - PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE); + PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT); if (IndVar) { // Check preconditions for proper SCEVExpander operation. SCEV does not // express SCEVExpander's dependencies, such as LoopSimplify. Instead any Index: test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll =================================================================== --- test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll +++ test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll @@ -34,7 +34,7 @@ ; CHECK-NEXT: br label [[IF_END_I126]] ; CHECK: if.end.i126: ; CHECK-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds i8, i8* [[DESTYPIXELPTR_010_I]], i32 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR_I]], null +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[DESTYPIXELPTR_010_I]], null ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY21_I]], label [[FOR_END_I129_LOOPEXIT:%.*]] ; CHECK: for.end.i129.loopexit: ; CHECK-NEXT: br label [[FOR_END_I129]] Index: test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll =================================================================== --- test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll +++ test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll @@ -17,12 +17,13 @@ ; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i8* null, [[END:%.*]] ; PTR64-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR64: preheader: +; PTR64-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[END]], i64 -1 ; PTR64-NEXT: br label [[LOOP:%.*]] ; PTR64: loop: ; PTR64-NEXT: [[P_01_US_US:%.*]] = phi i8* [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] ; PTR64-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR64-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]] -; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[END]] +; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[P_01_US_US]], [[SCEVGEP]] ; PTR64-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR64: exit.loopexit: ; PTR64-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] @@ -37,12 +38,13 @@ ; PTR32-NEXT: [[GUARD:%.*]] = icmp ult i8* null, [[END:%.*]] ; PTR32-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR32: preheader: +; PTR32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[END]], i32 -1 ; PTR32-NEXT: br label [[LOOP:%.*]] ; PTR32: loop: ; PTR32-NEXT: [[P_01_US_US:%.*]] = phi i8* [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] ; PTR32-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR32-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]] -; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[END]] +; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[P_01_US_US]], [[SCEVGEP]] ; PTR32-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR32: exit.loopexit: ; PTR32-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] @@ -80,12 +82,13 @@ ; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i8* [[BUF:%.*]], [[END:%.*]] ; PTR64-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR64: preheader: +; PTR64-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[END]], i64 -1 ; PTR64-NEXT: br label [[LOOP:%.*]] ; PTR64: loop: ; PTR64-NEXT: [[P_01_US_US:%.*]] = phi i8* [ [[BUF]], [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] ; PTR64-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR64-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]] -; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[END]] +; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[P_01_US_US]], [[SCEVGEP]] ; PTR64-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR64: exit.loopexit: ; PTR64-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] @@ -100,12 +103,13 @@ ; PTR32-NEXT: [[GUARD:%.*]] = icmp ult i8* [[BUF:%.*]], [[END:%.*]] ; PTR32-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR32: preheader: +; PTR32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[END]], i32 -1 ; PTR32-NEXT: br label [[LOOP:%.*]] ; PTR32: loop: ; PTR32-NEXT: [[P_01_US_US:%.*]] = phi i8* [ [[BUF]], [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] ; PTR32-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR32-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]] -; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[END]] +; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[P_01_US_US]], [[SCEVGEP]] ; PTR32-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR32: exit.loopexit: ; PTR32-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] @@ -146,14 +150,16 @@ ; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i32 0, [[CNT]] ; PTR64-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR64: preheader: -; PTR64-NEXT: [[TMP1:%.*]] = zext i32 [[CNT]] to i64 -; PTR64-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* null, i64 [[TMP1]] +; PTR64-NEXT: [[TMP1:%.*]] = add i32 [[EI]], -1 +; PTR64-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[BI]] +; PTR64-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +; PTR64-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* null, i64 [[TMP3]] ; PTR64-NEXT: br label [[LOOP:%.*]] ; PTR64: loop: ; PTR64-NEXT: [[P_01_US_US:%.*]] = phi i8* [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] ; PTR64-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR64-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]] -; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[LFTR_LIMIT]] +; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[P_01_US_US]], [[LFTR_LIMIT]] ; PTR64-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR64: exit.loopexit: ; PTR64-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] @@ -171,13 +177,15 @@ ; PTR32-NEXT: [[GUARD:%.*]] = icmp ult i32 0, [[CNT]] ; PTR32-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR32: preheader: -; PTR32-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* null, i32 [[CNT]] +; PTR32-NEXT: [[TMP1:%.*]] = add i32 [[EI]], -1 +; PTR32-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[BI]] +; PTR32-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* null, i32 [[TMP2]] ; PTR32-NEXT: br label [[LOOP:%.*]] ; PTR32: loop: ; PTR32-NEXT: [[P_01_US_US:%.*]] = phi i8* [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] ; PTR32-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR32-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]] -; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[LFTR_LIMIT]] +; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[P_01_US_US]], [[LFTR_LIMIT]] ; PTR32-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR32: exit.loopexit: ; PTR32-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] @@ -223,14 +231,17 @@ ; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i32 [[BI]], [[CNT]] ; PTR64-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR64: preheader: +; PTR64-NEXT: [[TMP1:%.*]] = add i32 [[EI]], -1 +; PTR64-NEXT: [[TMP2:%.*]] = shl i32 [[BI]], 1 +; PTR64-NEXT: [[TMP3:%.*]] = sub i32 [[TMP1]], [[TMP2]] +; PTR64-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +; PTR64-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* [[BUF]], i64 [[TMP4]] ; PTR64-NEXT: br label [[LOOP:%.*]] ; PTR64: loop: ; PTR64-NEXT: [[P_01_US_US:%.*]] = phi i8* [ [[BUF]], [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] -; PTR64-NEXT: [[IV:%.*]] = phi i32 [ [[BI]], [[PREHEADER]] ], [ [[IVNEXT:%.*]], [[LOOP]] ] ; PTR64-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR64-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]] -; PTR64-NEXT: [[IVNEXT]] = add nuw i32 [[IV]], 1 -; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IVNEXT]], [[CNT]] +; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[P_01_US_US]], [[LFTR_LIMIT]] ; PTR64-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR64: exit.loopexit: ; PTR64-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] @@ -248,14 +259,16 @@ ; PTR32-NEXT: [[GUARD:%.*]] = icmp ult i32 [[BI]], [[CNT]] ; PTR32-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR32: preheader: +; PTR32-NEXT: [[TMP1:%.*]] = add i32 [[EI]], -1 +; PTR32-NEXT: [[TMP2:%.*]] = shl i32 [[BI]], 1 +; PTR32-NEXT: [[TMP3:%.*]] = sub i32 [[TMP1]], [[TMP2]] +; PTR32-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* [[BUF]], i32 [[TMP3]] ; PTR32-NEXT: br label [[LOOP:%.*]] ; PTR32: loop: ; PTR32-NEXT: [[P_01_US_US:%.*]] = phi i8* [ [[BUF]], [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] -; PTR32-NEXT: [[IV:%.*]] = phi i32 [ [[BI]], [[PREHEADER]] ], [ [[IVNEXT:%.*]], [[LOOP]] ] ; PTR32-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR32-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]] -; PTR32-NEXT: [[IVNEXT]] = add nuw i32 [[IV]], 1 -; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IVNEXT]], [[CNT]] +; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[P_01_US_US]], [[LFTR_LIMIT]] ; PTR32-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR32: exit.loopexit: ; PTR32-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] @@ -301,12 +314,12 @@ ; PTR64-NEXT: [[CMP1604192:%.*]] = icmp ult i8* undef, [[ADD_PTR1603]] ; PTR64-NEXT: br i1 [[CMP1604192]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END1609:%.*]] ; PTR64: for.body.preheader: -; PTR64-NEXT: [[SCEVGEP:%.*]] = getelementptr [512 x i8], [512 x i8]* [[BASE]], i64 1, i64 0 +; PTR64-NEXT: [[SCEVGEP:%.*]] = getelementptr [512 x i8], [512 x i8]* [[BASE]], i64 0, i64 511 ; PTR64-NEXT: br label [[FOR_BODY:%.*]] ; PTR64: for.body: ; PTR64-NEXT: [[R_17193:%.*]] = phi i8* [ [[INCDEC_PTR1608:%.*]], [[FOR_BODY]] ], [ null, [[FOR_BODY_PREHEADER]] ] ; PTR64-NEXT: [[INCDEC_PTR1608]] = getelementptr i8, i8* [[R_17193]], i64 1 -; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR1608]], [[SCEVGEP]] +; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[R_17193]], [[SCEVGEP]] ; PTR64-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END1609_LOOPEXIT:%.*]] ; PTR64: for.end1609.loopexit: ; PTR64-NEXT: br label [[FOR_END1609]] @@ -321,12 +334,12 @@ ; PTR32-NEXT: [[CMP1604192:%.*]] = icmp ult i8* undef, [[ADD_PTR1603]] ; PTR32-NEXT: br i1 [[CMP1604192]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END1609:%.*]] ; PTR32: for.body.preheader: -; PTR32-NEXT: [[SCEVGEP:%.*]] = getelementptr [512 x i8], [512 x i8]* [[BASE]], i32 1, i32 0 +; PTR32-NEXT: [[SCEVGEP:%.*]] = getelementptr [512 x i8], [512 x i8]* [[BASE]], i32 0, i32 511 ; PTR32-NEXT: br label [[FOR_BODY:%.*]] ; PTR32: for.body: ; PTR32-NEXT: [[R_17193:%.*]] = phi i8* [ [[INCDEC_PTR1608:%.*]], [[FOR_BODY]] ], [ null, [[FOR_BODY_PREHEADER]] ] ; PTR32-NEXT: [[INCDEC_PTR1608]] = getelementptr i8, i8* [[R_17193]], i64 1 -; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR1608]], [[SCEVGEP]] +; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[R_17193]], [[SCEVGEP]] ; PTR32-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END1609_LOOPEXIT:%.*]] ; PTR32: for.end1609.loopexit: ; PTR32-NEXT: br label [[FOR_END1609]] Index: test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll =================================================================== --- test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll +++ test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll @@ -11,7 +11,8 @@ ; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i8 addrspace(2)* [[BASE]], [[ADD_PTR]] ; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8 addrspace(2)* [[BASE]], i8 [[IDX_TRUNC]] +; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[IDX_TRUNC]], -1 +; CHECK-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8 addrspace(2)* [[BASE]], i8 [[TMP0]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[P_02:%.*]] = phi i8 addrspace(2)* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BASE]], [[FOR_BODY_PREHEADER]] ] @@ -20,7 +21,7 @@ ; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i8 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] ; CHECK-NEXT: store i8 [[SUB_PTR_SUB]], i8 addrspace(2)* [[P_02]] ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8 addrspace(2)* [[P_02]], i32 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8 addrspace(2)* [[INCDEC_PTR]], [[LFTR_LIMIT]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8 addrspace(2)* [[P_02]], [[LFTR_LIMIT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] @@ -58,7 +59,8 @@ ; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i8 addrspace(3)* [[BASE]], [[ADD_PTR]] ; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8 addrspace(3)* [[BASE]], i16 [[IDX_TRUNC]] +; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[IDX_TRUNC]], -1 +; CHECK-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8 addrspace(3)* [[BASE]], i16 [[TMP0]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[P_02:%.*]] = phi i8 addrspace(3)* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BASE]], [[FOR_BODY_PREHEADER]] ] @@ -68,7 +70,7 @@ ; CHECK-NEXT: [[CONV:%.*]] = trunc i16 [[SUB_PTR_SUB]] to i8 ; CHECK-NEXT: store i8 [[CONV]], i8 addrspace(3)* [[P_02]] ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8 addrspace(3)* [[P_02]], i32 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8 addrspace(3)* [[INCDEC_PTR]], [[LFTR_LIMIT]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8 addrspace(3)* [[P_02]], [[LFTR_LIMIT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] Index: test/Transforms/IndVarSimplify/lftr-reuse.ll =================================================================== --- test/Transforms/IndVarSimplify/lftr-reuse.ll +++ test/Transforms/IndVarSimplify/lftr-reuse.ll @@ -20,7 +20,8 @@ ; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i8* [[BASE]], [[ADD_PTR]] ; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[IDX_EXT]], -1 +; CHECK-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[TMP0]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[P_02:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BASE]], [[FOR_BODY_PREHEADER]] ] @@ -30,7 +31,7 @@ ; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[SUB_PTR_SUB]] to i8 ; CHECK-NEXT: store i8 [[CONV]], i8* [[P_02]] ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[P_02]], i32 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR]], [[LFTR_LIMIT]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[P_02]], [[LFTR_LIMIT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] @@ -222,9 +223,6 @@ ; Remove %i which is only used by the exit test. ; Verify that SCEV can still compute a backedge count from the sign ; extended %n, used for pointer comparison by LFTR. -; -; TODO: Fix for PR13371 currently makes this impossible. See -; IndVarSimplify.cpp hasConcreteDef(). We may want to change to undef rules. define void @geplftr(i8* %base, i32 %x, i32 %y, i32 %n) nounwind { ; CHECK-LABEL: @geplftr( ; CHECK-NEXT: entry: @@ -236,14 +234,15 @@ ; CHECK-NEXT: [[CMP_PH:%.*]] = icmp ult i32 [[X]], [[LIM]] ; CHECK-NEXT: br i1 [[CMP_PH]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* [[ADD_PTR10]], i64 [[TMP1]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INC:%.*]], [[LOOP]] ], [ [[X]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[APTR:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[LOOP]] ], [ [[ADD_PTR10]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[APTR]], i32 1 ; CHECK-NEXT: store i8 3, i8* [[APTR]] -; CHECK-NEXT: [[INC]] = add nuw i32 [[I]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], [[LIM]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[APTR]], [[LFTR_LIMIT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] @@ -303,13 +302,14 @@ ; CHECK-NEXT: br i1 [[CMP_PH]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 -; CHECK-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* [[IVSTART]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -1 +; CHECK-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* [[IVSTART]], i64 [[TMP1]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[APTR:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[LOOP]] ], [ [[IVSTART]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[APTR]], i32 1 ; CHECK-NEXT: store i8 3, i8* [[APTR]] -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR]], [[LFTR_LIMIT]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[APTR]], [[LFTR_LIMIT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] Index: test/Transforms/IndVarSimplify/lftr.ll =================================================================== --- test/Transforms/IndVarSimplify/lftr.ll +++ test/Transforms/IndVarSimplify/lftr.ll @@ -167,7 +167,7 @@ ; CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOT0]], align 1 ; CHECK-NEXT: [[TMP3]] = getelementptr inbounds i8, i8* [[P_0]], i64 1 ; CHECK-NEXT: store i8 [[TMP2]], i8* [[P_0]], align 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[TMP3]], getelementptr (i8, i8* getelementptr inbounds ([240 x i8], [240 x i8]* @data, i64 0, i64 0), i64 240) +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[P_0]], getelementptr inbounds ([240 x i8], [240 x i8]* @data, i64 0, i64 239) ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void