Index: lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- lib/Transforms/Scalar/IndVarSimplify.cpp +++ lib/Transforms/Scalar/IndVarSimplify.cpp @@ -31,6 +31,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -42,6 +43,7 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -2074,6 +2076,48 @@ return Phi != getLoopPhiForCounter(IncV, L); } +/// Return true if undefined behavior would provable be executed on the path to +/// OnPathTo if Root produced a posion result. Note that this doesn't say +/// anything about whether OnPathTo is actually executed or whether Root is +/// actually poison. This can be used to assess whether a new use of Root can +/// be added at a location which is control equivalent wit OnPathTo (such as +/// immediately before it) without introducing UB which didn't previously +/// exist. Note that a false result conveys no information. +static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, + Instruction *OnPathTo, + DominatorTree *DT) { + // Basic approach is to assume Root is poison, propagate poison forward + // through all users we can easily track, and then check whether any of those + // users are provable UB and must execute before out exiting block might + // exit. + + // The set of all recursive users we've visited (which are assumed to all be + // poison because of said visit) + SmallSet KnownPoison; + SmallVector Worklist; + Worklist.push_back(Root); + while (!Worklist.empty()) { + const Instruction *I = Worklist.pop_back_val(); + + // If we know this must trigger UB on a path leading to our exit... + if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) + return true; + + // If we can't analyze propagation through this instruction, just skip it + // and transitive users. Safe as false is a conservative result. + if (!propagatesFullPoison(I) && I != Root) + continue; + + if (KnownPoison.insert(I).second) + for (const User *User : I->users()) + Worklist.push_back(cast(User)); + } + + // Might be non-UB, or might have a path we couldn't prove must execute on + // way to exiting bb. + return false; +} + /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils /// down to checking that all operands are constant and listing instructions /// that may hide undef. @@ -2162,7 +2206,8 @@ /// valid count without scaling the address stride, so it remains a pointer /// expression as far as SCEV is concerned. static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, - const SCEV *BECount, ScalarEvolution *SE) { + const SCEV *BECount, + ScalarEvolution *SE, DominatorTree *DT) { uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); Value *Cond = cast(ExitingBB->getTerminator())->getCondition(); @@ -2192,20 +2237,18 @@ if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) continue; - // Avoid reusing a potentially undef value to compute other values that may - // have originally had a concrete definition. - if (!hasConcreteDef(Phi)) { - // We explicitly allow unknown phis as long as they are already used by - // the loop test. In this case we assume that performing LFTR could not - // increase the number of undef users. - // TODO: Generalize this to allow *any* loop exit which is known to - // execute on each iteration - if (L->getExitingBlock()) - if (ICmpInst *Cond = getLoopTest(L, ExitingBB)) - if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L) && - Phi != getLoopPhiForCounter(Cond->getOperand(1), L)) - continue; - } + // 1) When swiching IVs, we have to ensure that we don't introduce UB which + // didn't exist in the original program by introducing a use on an + // iteration where said IV produces poison. Our strategy here differs for + // pointers and integer IVs. For integers, we strip and reinfer as needed, + // see code in linearFunctionTestReplace. For pointers, we're currently + // blatantly incorrect for this cornercase. See D62936 for context. + // 2) Avoid reusing a potentially undef value to compute other values that + // may have originally had a concrete definition. + auto *ExitTerm = L->getExitingBlock()->getTerminator(); + if (!mustExecuteUBIfPoisonOnPathTo(Phi, ExitTerm, DT) && + !hasConcreteDef(Phi)) + continue; const SCEV *Init = AR->getStart(); @@ -2643,7 +2686,7 @@ if (BETakenCount->isZero()) continue; - PHINode *IndVar = FindLoopCounter(L, ExitingBB, BETakenCount, SE); + PHINode *IndVar = FindLoopCounter(L, ExitingBB, BETakenCount, SE, DT); if (!IndVar) continue; Index: test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll =================================================================== --- test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll +++ test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll @@ -223,14 +223,16 @@ ; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i32 [[BI]], [[CNT]] ; PTR64-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR64: preheader: +; PTR64-NEXT: [[TMP1:%.*]] = shl i32 [[BI]], 1 +; PTR64-NEXT: [[TMP2:%.*]] = sub i32 [[EI]], [[TMP1]] +; PTR64-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +; PTR64-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* [[BUF]], i64 [[TMP3]] ; PTR64-NEXT: br label [[LOOP:%.*]] ; PTR64: loop: ; PTR64-NEXT: [[P_01_US_US:%.*]] = phi i8* [ [[BUF]], [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] -; PTR64-NEXT: [[IV:%.*]] = phi i32 [ [[BI]], [[PREHEADER]] ], [ [[IVNEXT:%.*]], [[LOOP]] ] ; PTR64-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR64-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]] -; PTR64-NEXT: [[IVNEXT]] = add nuw i32 [[IV]], 1 -; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IVNEXT]], [[CNT]] +; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[LFTR_LIMIT]] ; PTR64-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR64: exit.loopexit: ; PTR64-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] @@ -248,14 +250,15 @@ ; PTR32-NEXT: [[GUARD:%.*]] = icmp ult i32 [[BI]], [[CNT]] ; PTR32-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR32: preheader: +; PTR32-NEXT: [[TMP1:%.*]] = shl i32 [[BI]], 1 +; PTR32-NEXT: [[TMP2:%.*]] = sub i32 [[EI]], [[TMP1]] +; PTR32-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* [[BUF]], i32 [[TMP2]] ; PTR32-NEXT: br label [[LOOP:%.*]] ; PTR32: loop: ; PTR32-NEXT: [[P_01_US_US:%.*]] = phi i8* [ [[BUF]], [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] -; PTR32-NEXT: [[IV:%.*]] = phi i32 [ [[BI]], [[PREHEADER]] ], [ [[IVNEXT:%.*]], [[LOOP]] ] ; PTR32-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR32-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]] -; PTR32-NEXT: [[IVNEXT]] = add nuw i32 [[IV]], 1 -; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IVNEXT]], [[CNT]] +; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[LFTR_LIMIT]] ; PTR32-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR32: exit.loopexit: ; PTR32-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] Index: test/Transforms/IndVarSimplify/lftr-dead-ivs.ll =================================================================== --- test/Transforms/IndVarSimplify/lftr-dead-ivs.ll +++ test/Transforms/IndVarSimplify/lftr-dead-ivs.ll @@ -105,14 +105,13 @@ define void @dom_store_preinc(i8* %a) #0 { ; CHECK-LABEL: @dom_store_preinc( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* [[A:%.*]], i64 246 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[I_0:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[TMP4:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[P_0:%.*]] = phi i8* [ [[A:%.*]], [[ENTRY]] ], [ [[TMP3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[P_0:%.*]] = phi i8* [ [[A]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[LOOP]] ] ; CHECK-NEXT: store volatile i8 0, i8* [[P_0]] ; CHECK-NEXT: [[TMP3]] = getelementptr inbounds i8, i8* [[P_0]], i64 1 -; CHECK-NEXT: [[TMP4]] = add nuw i8 [[I_0]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8 [[TMP4]], -10 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[TMP3]], [[LFTR_LIMIT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -136,14 +135,13 @@ define void @dom_store_postinc(i8* %a) #0 { ; CHECK-LABEL: @dom_store_postinc( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* [[A:%.*]], i64 246 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[I_0:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[TMP4:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[P_0:%.*]] = phi i8* [ [[A:%.*]], [[ENTRY]] ], [ [[TMP3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[P_0:%.*]] = phi i8* [ [[A]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[TMP3]] = getelementptr inbounds i8, i8* [[P_0]], i64 1 ; CHECK-NEXT: store volatile i8 0, i8* [[TMP3]] -; CHECK-NEXT: [[TMP4]] = add nuw i8 [[I_0]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8 [[TMP4]], -10 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[TMP3]], [[LFTR_LIMIT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -167,14 +165,13 @@ define i8 @dom_load(i8* %a) #0 { ; CHECK-LABEL: @dom_load( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* [[A:%.*]], i64 246 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[I_0:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[TMP4:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[P_0:%.*]] = phi i8* [ [[A:%.*]], [[ENTRY]] ], [ [[TMP3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[P_0:%.*]] = phi i8* [ [[A]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[TMP3]] = getelementptr inbounds i8, i8* [[P_0]], i64 1 ; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[TMP3]] -; CHECK-NEXT: [[TMP4]] = add nuw i8 [[I_0]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8 [[TMP4]], -10 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[TMP3]], [[LFTR_LIMIT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: [[V_LCSSA:%.*]] = phi i8 [ [[V]], [[LOOP]] ] @@ -199,14 +196,15 @@ define i64 @dom_div(i64 %a) #0 { ; CHECK-LABEL: @dom_div( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[A:%.*]] to i8 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[TMP0]], -10 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[I_0:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[TMP4:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[I_1:%.*]] = phi i64 [ [[A:%.*]], [[ENTRY]] ], [ [[TMP3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[I_1:%.*]] = phi i64 [ [[A]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[TMP3]] = add nuw nsw i64 [[I_1]], 1 ; CHECK-NEXT: [[V:%.*]] = udiv i64 5, [[TMP3]] -; CHECK-NEXT: [[TMP4]] = add nuw i8 [[I_0]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8 [[TMP4]], -10 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[TMP3]] to i8 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8 [[LFTR_WIDEIV]], [[TMP1]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[LOOP]] ] Index: test/Transforms/IndVarSimplify/lftr-reuse.ll =================================================================== --- test/Transforms/IndVarSimplify/lftr-reuse.ll +++ test/Transforms/IndVarSimplify/lftr-reuse.ll @@ -222,9 +222,6 @@ ; Remove %i which is only used by the exit test. ; Verify that SCEV can still compute a backedge count from the sign ; extended %n, used for pointer comparison by LFTR. -; -; TODO: Fix for PR13371 currently makes this impossible. See -; IndVarSimplify.cpp hasConcreteDef(). We may want to change to undef rules. define void @geplftr(i8* %base, i32 %x, i32 %y, i32 %n) nounwind { ; CHECK-LABEL: @geplftr( ; CHECK-NEXT: entry: @@ -236,14 +233,14 @@ ; CHECK-NEXT: [[CMP_PH:%.*]] = icmp ult i32 [[X]], [[LIM]] ; CHECK-NEXT: br i1 [[CMP_PH]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* [[ADD_PTR10]], i64 [[TMP0]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INC:%.*]], [[LOOP]] ], [ [[X]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[APTR:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[LOOP]] ], [ [[ADD_PTR10]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[APTR]], i32 1 ; CHECK-NEXT: store i8 3, i8* [[APTR]] -; CHECK-NEXT: [[INC]] = add nuw i32 [[I]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], [[LIM]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR]], [[LFTR_LIMIT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]]