Index: llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -31,6 +31,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -42,6 +43,7 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -2077,6 +2079,48 @@ return Phi != getLoopPhiForCounter(IncV, L); } +/// Return true if undefined behavior would provable be executed on the path to +/// OnPathTo if Root produced a posion result. Note that this doesn't say +/// anything about whether OnPathTo is actually executed or whether Root is +/// actually poison. This can be used to assess whether a new use of Root can +/// be added at a location which is control equivalent with OnPathTo (such as +/// immediately before it) without introducing UB which didn't previously +/// exist. Note that a false result conveys no information. +static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, + Instruction *OnPathTo, + DominatorTree *DT) { + // Basic approach is to assume Root is poison, propagate poison forward + // through all users we can easily track, and then check whether any of those + // users are provable UB and must execute before out exiting block might + // exit. + + // The set of all recursive users we've visited (which are assumed to all be + // poison because of said visit) + SmallSet KnownPoison; + SmallVector Worklist; + Worklist.push_back(Root); + while (!Worklist.empty()) { + const Instruction *I = Worklist.pop_back_val(); + + // If we know this must trigger UB on a path leading our target. + if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) + return true; + + // If we can't analyze propagation through this instruction, just skip it + // and transitive users. Safe as false is a conservative result. + if (!propagatesFullPoison(I) && I != Root) + continue; + + if (KnownPoison.insert(I).second) + for (const User *User : I->users()) + Worklist.push_back(cast(User)); + } + + // Might be non-UB, or might have a path we couldn't prove must execute on + // way to exiting bb. + return false; +} + /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils /// down to checking that all operands are constant and listing instructions /// that may hide undef. @@ -2165,7 +2209,8 @@ /// valid count without scaling the address stride, so it remains a pointer /// expression as far as SCEV is concerned. static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, - const SCEV *BECount, ScalarEvolution *SE) { + const SCEV *BECount, + ScalarEvolution *SE, DominatorTree *DT) { uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); Value *Cond = cast(ExitingBB->getTerminator())->getCondition(); @@ -2210,6 +2255,18 @@ continue; } + // Avoid introducing undefined behavior due to poison which didn't exist in + // the original program. (Annoyingly, the rules for poison and undef + // propagation are distinct, so this does NOT cover the undef case above.) + // We have to ensure that we don't introduce UB by introducing a use on an + // iteration where said IV produces poison. Our strategy here differs for + // pointers and integer IVs. For integers, we strip and reinfer as needed, + // see code in linearFunctionTestReplace. For pointers, we restrict + // transforms as there is no good way to reinfer inbounds once lost. + if (!Phi->getType()->isIntegerTy() && + !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) + continue; + const SCEV *Init = AR->getStart(); if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { @@ -2338,15 +2395,30 @@ // compare against the post-incremented value, otherwise we must compare // against the preincremented value. if (ExitingBB == L->getLoopLatch()) { - // Add one to the "backedge-taken" count to get the trip count. - // This addition may overflow, which is valid as long as the comparison is - // truncated to BackedgeTakenCount->getType(). - IVCount = SE->getAddExpr(BackedgeTakenCount, - SE->getOne(BackedgeTakenCount->getType())); - // The BackedgeTaken expression contains the number of times that the - // backedge branches to the loop header. This is one less than the - // number of times the loop executes, so use the incremented indvar. - CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBB); + bool SafeToPostInc = IndVar->getType()->isIntegerTy(); + if (!SafeToPostInc) { + // For pointer IVs, we chose to not strip inbounds which requires us not + // to add a potentially UB introducing use. We need to either a) show + // the loop test we're modifying is already in post-inc form, or b) show + // that adding a use must not introduce UB. + Instruction *Inc = + cast(IndVar->getIncomingValueForBlock(L->getLoopLatch())); + ICmpInst *LoopTest = getLoopTest(L, ExitingBB); + SafeToPostInc = LoopTest->getOperand(0) == Inc || + LoopTest->getOperand(1) == Inc || + mustExecuteUBIfPoisonOnPathTo(Inc, ExitingBB->getTerminator(), DT); + } + if (SafeToPostInc) { + // Add one to the "backedge-taken" count to get the trip count. + // This addition may overflow, which is valid as long as the comparison + // is truncated to BackedgeTakenCount->getType(). + IVCount = SE->getAddExpr(BackedgeTakenCount, + SE->getOne(BackedgeTakenCount->getType())); + // The BackedgeTaken expression contains the number of times that the + // backedge branches to the loop header. This is one less than the + // number of times the loop executes, so use the incremented indvar. + CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBB); + } } // It may be necessary to drop nowrap flags on the incrementing instruction @@ -2646,7 +2718,7 @@ if (BETakenCount->isZero()) continue; - PHINode *IndVar = FindLoopCounter(L, ExitingBB, BETakenCount, SE); + PHINode *IndVar = FindLoopCounter(L, ExitingBB, BETakenCount, SE, DT); if (!IndVar) continue; Index: llvm/trunk/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll @@ -28,13 +28,15 @@ ; CHECK-NEXT: br label [[FOR_BODY21_I:%.*]] ; CHECK: for.body21.i: ; CHECK-NEXT: [[DESTYPIXELPTR_010_I:%.*]] = phi i8* [ null, [[FOR_BODY21_LR_PH_I]] ], [ [[INCDEC_PTR_I:%.*]], [[IF_END_I126:%.*]] ] +; CHECK-NEXT: [[X_09_I:%.*]] = phi i32 [ 0, [[FOR_BODY21_LR_PH_I]] ], [ [[INC_I125:%.*]], [[IF_END_I126]] ] ; CHECK-NEXT: br i1 undef, label [[IF_END_I126]], label [[IF_ELSE_I124:%.*]] ; CHECK: if.else.i124: ; CHECK-NEXT: store i8 undef, i8* [[DESTYPIXELPTR_010_I]], align 1 ; CHECK-NEXT: br label [[IF_END_I126]] ; CHECK: if.end.i126: ; CHECK-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds i8, i8* [[DESTYPIXELPTR_010_I]], i32 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR_I]], null +; CHECK-NEXT: [[INC_I125]] = add nuw i32 [[X_09_I]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC_I125]], undef ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY21_I]], label [[FOR_END_I129_LOOPEXIT:%.*]] ; CHECK: for.end.i129.loopexit: ; CHECK-NEXT: br label [[FOR_END_I129]] Index: llvm/trunk/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll @@ -301,13 +301,12 @@ ; PTR64-NEXT: [[CMP1604192:%.*]] = icmp ult i8* undef, [[ADD_PTR1603]] ; PTR64-NEXT: br i1 [[CMP1604192]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END1609:%.*]] ; PTR64: for.body.preheader: -; PTR64-NEXT: [[SCEVGEP:%.*]] = getelementptr [512 x i8], [512 x i8]* [[BASE]], i64 1, i64 0 ; PTR64-NEXT: br label [[FOR_BODY:%.*]] ; PTR64: for.body: ; PTR64-NEXT: [[R_17193:%.*]] = phi i8* [ [[INCDEC_PTR1608:%.*]], [[FOR_BODY]] ], [ null, [[FOR_BODY_PREHEADER]] ] ; PTR64-NEXT: [[INCDEC_PTR1608]] = getelementptr i8, i8* [[R_17193]], i64 1 -; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR1608]], [[SCEVGEP]] -; PTR64-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END1609_LOOPEXIT:%.*]] +; PTR64-NEXT: [[CMP1604:%.*]] = icmp ult i8* [[INCDEC_PTR1608]], [[ADD_PTR1603]] +; PTR64-NEXT: br i1 [[CMP1604]], label [[FOR_BODY]], label [[FOR_END1609_LOOPEXIT:%.*]] ; PTR64: for.end1609.loopexit: ; PTR64-NEXT: br label [[FOR_END1609]] ; PTR64: for.end1609: @@ -321,13 +320,12 @@ ; PTR32-NEXT: [[CMP1604192:%.*]] = icmp ult i8* undef, [[ADD_PTR1603]] ; PTR32-NEXT: br i1 [[CMP1604192]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END1609:%.*]] ; PTR32: for.body.preheader: -; PTR32-NEXT: [[SCEVGEP:%.*]] = getelementptr [512 x i8], [512 x i8]* [[BASE]], i32 1, i32 0 ; PTR32-NEXT: br label [[FOR_BODY:%.*]] ; PTR32: for.body: ; PTR32-NEXT: [[R_17193:%.*]] = phi i8* [ [[INCDEC_PTR1608:%.*]], [[FOR_BODY]] ], [ null, [[FOR_BODY_PREHEADER]] ] ; PTR32-NEXT: [[INCDEC_PTR1608]] = getelementptr i8, i8* [[R_17193]], i64 1 -; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR1608]], [[SCEVGEP]] -; PTR32-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END1609_LOOPEXIT:%.*]] +; PTR32-NEXT: [[CMP1604:%.*]] = icmp ult i8* [[INCDEC_PTR1608]], [[ADD_PTR1603]] +; PTR32-NEXT: br i1 [[CMP1604]], label [[FOR_BODY]], label [[FOR_END1609_LOOPEXIT:%.*]] ; PTR32: for.end1609.loopexit: ; PTR32-NEXT: br label [[FOR_END1609]] ; PTR32: for.end1609: Index: llvm/trunk/test/Transforms/IndVarSimplify/lftr-dead-ivs.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/lftr-dead-ivs.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/lftr-dead-ivs.ll @@ -25,14 +25,16 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[P_0:%.*]] = phi i8* [ getelementptr inbounds ([240 x i8], [240 x i8]* @data, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[CONT:%.*]] ] +; CHECK-NEXT: [[I_0:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[TMP4:%.*]], [[CONT:%.*]] ] +; CHECK-NEXT: [[P_0:%.*]] = phi i8* [ getelementptr inbounds ([240 x i8], [240 x i8]* @data, i64 0, i64 0), [[ENTRY]] ], [ [[TMP3:%.*]], [[CONT]] ] ; CHECK-NEXT: [[TMP3]] = getelementptr inbounds i8, i8* [[P_0]], i64 1 ; CHECK-NEXT: br i1 [[ALWAYS_FALSE:%.*]], label [[NEVER_EXECUTED:%.*]], label [[CONT]] ; CHECK: never_executed: ; CHECK-NEXT: store volatile i8 0, i8* [[TMP3]] ; CHECK-NEXT: br label [[CONT]] ; CHECK: cont: -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[TMP3]], getelementptr (i8, i8* getelementptr inbounds ([240 x i8], [240 x i8]* @data, i64 0, i64 0), i64 246) +; CHECK-NEXT: [[TMP4]] = add nuw i8 [[I_0]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8 [[TMP4]], -10 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -110,7 +112,7 @@ ; CHECK-NEXT: [[P_0:%.*]] = phi i8* [ getelementptr inbounds ([240 x i8], [240 x i8]* @data, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[LOOP]] ] ; CHECK-NEXT: store volatile i8 0, i8* [[P_0]] ; CHECK-NEXT: [[TMP3]] = getelementptr inbounds i8, i8* [[P_0]], i64 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[TMP3]], getelementptr (i8, i8* getelementptr inbounds ([240 x i8], [240 x i8]* @data, i64 0, i64 0), i64 246) +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[P_0]], getelementptr (i8, i8* getelementptr inbounds ([240 x i8], [240 x i8]* @data, i64 0, i64 0), i64 245) ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void Index: llvm/trunk/test/Transforms/IndVarSimplify/lftr.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/lftr.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/lftr.ll @@ -167,7 +167,7 @@ ; CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOT0]], align 1 ; CHECK-NEXT: [[TMP3]] = getelementptr inbounds i8, i8* [[P_0]], i64 1 ; CHECK-NEXT: store i8 [[TMP2]], i8* [[P_0]], align 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[TMP3]], getelementptr (i8, i8* getelementptr inbounds ([240 x i8], [240 x i8]* @data, i64 0, i64 0), i64 240) +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[P_0]], getelementptr inbounds ([240 x i8], [240 x i8]* @data, i64 0, i64 239) ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void