Index: lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- lib/Transforms/Scalar/IndVarSimplify.cpp +++ lib/Transforms/Scalar/IndVarSimplify.cpp @@ -31,6 +31,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -42,6 +43,7 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -2101,6 +2103,85 @@ return Phi != getLoopPhiForCounter(IncV, L); } +/// Return true if the given instruction must trigger undefined behavior under +/// the assumption that a) the instructions in Visited are poison, b) at +/// least one of the instructions operands is in that set, and c) that PoisonI +/// is assumed to execute. +static bool MustTriggerUB(Instruction *PoisonI, + const SmallSet& Visited, + DominatorTree *DT) { + // The check below only considers the memory operand of the store to trigger + // UB. Per LangRef, the value operand does as well. TODO: extend this to a + // broader class. rmw? cas? + if (isa(PoisonI)) + return true; + auto *NotPoison = + dyn_cast_or_null(getGuaranteedNonFullPoisonOp(PoisonI)); + if (NotPoison != nullptr && Visited.count(NotPoison)) + return true; + return false; +} + +static bool propagatesFullPoison(Instruction *I, + const SmallSet& KnownPoison, + DominatorTree *DT) { + if (propagatesFullPoison(I)) + return true; + // For a phi node, if the posion comes in from a statically dead path, then + // the result is never poison. Avoid this case by restricting to cases where + // poison enters along a path which must execute for the phi to execute. + if (auto *P = dyn_cast(I)) + for (Value *OpV : P->operands()) + if (auto *OpI = dyn_cast(OpV)) + if (KnownPoison.count(OpI) && DT->dominates(OpI, I)) + return true; + return false; +} + + +/// Return true if undefined behavior would provable be executed on the path to +/// OnPathTo if Root produced a posion result. Note that this doesn't say +/// anything about whether OnPathTo is actually executed or whether Root is +/// actually poison. This can be used to assess whether a new use of Root can +/// be added at a location which is control equivalent wit OnPathTo (such as +/// immediately before it) without introducing UB which didn't previously +/// exist. Note that a false result conveys no information. +static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, + Instruction *OnPathTo, + DominatorTree *DT) { + // Basic approach is to assume Root is poison, propagate poison forward + // through all users we can easily track, and then check whether any of those + // users are provable UB and must execute before out exiting block might + // exit. + + // The set of all recursive users we've visited (which are assumed to all be + // poison because of said visit) + SmallSet KnownPoison; + SmallVector Worklist; + Worklist.push_back(Root); + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + + // If we know this must trigger UB on a path leading to our exit... + if (DT->dominates(I, OnPathTo) && + MustTriggerUB(I, KnownPoison, DT)) + return true; + + // If we can't analyze propagation through this instruction, just skip it + // and transitive users. Safe as false is a conservative result. + if (!propagatesFullPoison(I, KnownPoison, DT) && I != Root) + continue; + + if (KnownPoison.insert(I).second) + for (User *User : I->users()) + Worklist.push_back(cast(User)); + } + + // Might be non-UB, or might have a path we couldn't prove must execute on + // way to exiting bb. + return false; +} + /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils /// down to checking that all operands are constant and listing instructions /// that may hide undef. @@ -2189,7 +2270,7 @@ /// valid count without scaling the address stride, so it remains a pointer /// expression as far as SCEV is concerned. static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount, - ScalarEvolution *SE) { + ScalarEvolution *SE, DominatorTree *DT) { uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); Value *Cond = @@ -2220,19 +2301,19 @@ if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) continue; - // Avoid reusing a potentially undef value to compute other values that may - // have originally had a concrete definition. - if (!hasConcreteDef(Phi)) { - // We explicitly allow unknown phis as long as they are already used by - // the loop test. In this case we assume that performing LFTR could not - // increase the number of undef users. - if (ICmpInst *Cond = getLoopTest(L)) { - if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L) && - Phi != getLoopPhiForCounter(Cond->getOperand(1), L)) { - continue; - } - } - } + // 1) When swiching IVs, we have to ensure that we don't introduce UB which + // didn't exist in the original program by introducing a use on an + // iteration where said IV produces poison. Our strategy here differs for + // pointers and integer IVs. For integers, we strip and reinfer as needed, + // see code in linearFunctionTestReplace. For pointers, we're currently + // blatantly incorrect for this cornercase. See D62936 for context. + // 2) Avoid reusing a potentially undef value to compute other values that + // may have originally had a concrete definition. + auto *ExitTerm = L->getExitingBlock()->getTerminator(); + if (!mustExecuteUBIfPoisonOnPathTo(Phi, ExitTerm, DT) && + !hasConcreteDef(Phi)) + continue; + const SCEV *Init = AR->getStart(); if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { @@ -2648,7 +2729,7 @@ // using it. We can currently only handle loops with a single exit. if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L)) { - PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE); + PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT); if (IndVar) { // Check preconditions for proper SCEVExpander operation. SCEV does not // express SCEVExpander's dependencies, such as LoopSimplify. Instead any Index: test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll =================================================================== --- test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll +++ test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll @@ -223,14 +223,16 @@ ; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i32 [[BI]], [[CNT]] ; PTR64-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR64: preheader: +; PTR64-NEXT: [[TMP1:%.*]] = shl i32 [[BI]], 1 +; PTR64-NEXT: [[TMP2:%.*]] = sub i32 [[EI]], [[TMP1]] +; PTR64-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +; PTR64-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* [[BUF]], i64 [[TMP3]] ; PTR64-NEXT: br label [[LOOP:%.*]] ; PTR64: loop: ; PTR64-NEXT: [[P_01_US_US:%.*]] = phi i8* [ [[BUF]], [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] -; PTR64-NEXT: [[IV:%.*]] = phi i32 [ [[BI]], [[PREHEADER]] ], [ [[IVNEXT:%.*]], [[LOOP]] ] ; PTR64-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR64-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]] -; PTR64-NEXT: [[IVNEXT]] = add nuw i32 [[IV]], 1 -; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IVNEXT]], [[CNT]] +; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[LFTR_LIMIT]] ; PTR64-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR64: exit.loopexit: ; PTR64-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] @@ -248,14 +250,15 @@ ; PTR32-NEXT: [[GUARD:%.*]] = icmp ult i32 [[BI]], [[CNT]] ; PTR32-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR32: preheader: +; PTR32-NEXT: [[TMP1:%.*]] = shl i32 [[BI]], 1 +; PTR32-NEXT: [[TMP2:%.*]] = sub i32 [[EI]], [[TMP1]] +; PTR32-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* [[BUF]], i32 [[TMP2]] ; PTR32-NEXT: br label [[LOOP:%.*]] ; PTR32: loop: ; PTR32-NEXT: [[P_01_US_US:%.*]] = phi i8* [ [[BUF]], [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] -; PTR32-NEXT: [[IV:%.*]] = phi i32 [ [[BI]], [[PREHEADER]] ], [ [[IVNEXT:%.*]], [[LOOP]] ] ; PTR32-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR32-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]] -; PTR32-NEXT: [[IVNEXT]] = add nuw i32 [[IV]], 1 -; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IVNEXT]], [[CNT]] +; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[LFTR_LIMIT]] ; PTR32-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR32: exit.loopexit: ; PTR32-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] Index: test/Transforms/IndVarSimplify/lftr-reuse.ll =================================================================== --- test/Transforms/IndVarSimplify/lftr-reuse.ll +++ test/Transforms/IndVarSimplify/lftr-reuse.ll @@ -222,9 +222,6 @@ ; Remove %i which is only used by the exit test. ; Verify that SCEV can still compute a backedge count from the sign ; extended %n, used for pointer comparison by LFTR. -; -; TODO: Fix for PR13371 currently makes this impossible. See -; IndVarSimplify.cpp hasConcreteDef(). We may want to change to undef rules. define void @geplftr(i8* %base, i32 %x, i32 %y, i32 %n) nounwind { ; CHECK-LABEL: @geplftr( ; CHECK-NEXT: entry: @@ -236,14 +233,14 @@ ; CHECK-NEXT: [[CMP_PH:%.*]] = icmp ult i32 [[X]], [[LIM]] ; CHECK-NEXT: br i1 [[CMP_PH]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[LFTR_LIMIT:%.*]] = getelementptr i8, i8* [[ADD_PTR10]], i64 [[TMP0]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INC:%.*]], [[LOOP]] ], [ [[X]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[APTR:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[LOOP]] ], [ [[ADD_PTR10]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[APTR]], i32 1 ; CHECK-NEXT: store i8 3, i8* [[APTR]] -; CHECK-NEXT: [[INC]] = add nuw i32 [[I]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], [[LIM]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR]], [[LFTR_LIMIT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]]