Index: llvm/lib/Transforms/Utils/SimplifyIndVar.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -1540,6 +1540,41 @@ bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap(); bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap(); auto AnotherOpExtKind = ExtKind; + + // The facts we are going to prove need to be true at the point that + // dominates all users. Find this point while checking the users. + const Instruction *Context = nullptr; + // Check that all uses are either s/zext, or narrow def (in case of we are + // widening the IV increment). + SmallVector ExtUsers; + for (Use &U : NarrowUse->uses()) { + if (U.getUser() == NarrowDef) + continue; + Instruction *User = nullptr; + if (ExtKind == SignExtended) + User = dyn_cast(U.getUser()); + else + User = dyn_cast(U.getUser()); + if (!User || User->getType() != WideType) + return false; + ExtUsers.push_back(User); + if (!Context) + // Pick first user as context. + Context = User; + else if (DT->dominates(User, Context)) + Context = User; + else if (!DT->dominates(Context, User)) + // For users that don't have dominance relation, use common dominator. + Context = + DT->findNearestCommonDominator(Context->getParent(), + User->getParent())->getTerminator(); + } + // No users, the instruction is dead. + if (!Context) { + DeadInsts.emplace_back(NarrowUse); + return true; + } + if (!CanSignExtend && !CanZeroExtend) { // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we // will most likely not see it. Let's try to prove it. @@ -1552,7 +1587,7 @@ if (!SE->isKnownNegative(RHS)) return false; bool ProvedSubNUW = SE->isKnownPredicateAt( - ICmpInst::ICMP_UGE, LHS, SE->getNegativeSCEV(RHS), NarrowUse); + ICmpInst::ICMP_UGE, LHS, SE->getNegativeSCEV(RHS), Context); if (!ProvedSubNUW) return false; // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as @@ -1566,22 +1601,6 @@ if (!AddRecOp1 || AddRecOp1->getLoop() != L) return false; - // Check that all uses are either s/zext, or narrow def (in case of we are - // widening the IV increment). - SmallVector ExtUsers; - for (Use &U : NarrowUse->uses()) { - if (U.getUser() == NarrowDef) - continue; - Instruction *User = nullptr; - if (ExtKind == SignExtended) - User = dyn_cast(U.getUser()); - else - User = dyn_cast(U.getUser()); - if (!User || User->getType() != WideType) - return false; - ExtUsers.push_back(User); - } - LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); // Generating a widening use instruction. Index: llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll +++ llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll @@ -554,16 +554,13 @@ ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[IV_NEXT:%.*]] = add i32 [[TMP1]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 ; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[BACKEDGE]] ; CHECK: backedge: -; CHECK-NEXT: [[INDEX:%.*]] = zext i32 [[IV_NEXT]] to i64 -; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[TMP1]] ; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4 -; CHECK-NEXT: [[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q]], align 4 +; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q:%.*]], align 4 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]]