Index: llvm/lib/Transforms/Utils/SimplifyIndVar.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -284,12 +284,31 @@ SmallVector Users; for (auto *U : ICmp->users()) Users.push_back(cast(U)); + auto invalidateIfProfitable = [&]() { + // In general, we don't need to invalidate here as, at worst, we're leaving + // SCEV in a correct, but inprecise state. Invalidating is expensive as + // it forces recomputation of potentially all SCEVs in the loop on the next + // instruction visited. However, by not invalidating we can end up with + // very sub-optimal results. This heuristic tries to invalidate only when + // we know we've discharged a loop exit condition and have thus discovered + // new information likely to greatly simplify SCEVs in the loop. + for (auto *I : Users) { + if (isa(I) || isa(I)) { + if (ICmpLoop->isLoopExiting(I->getParent())) { + SE->forgetLoop(ICmpLoop); + return; + } + } + } + }; const Instruction *CtxI = findCommonDominator(Users, *DT); if (auto Ev = SE->evaluatePredicateAt(Pred, S, X, CtxI)) { ICmp->replaceAllUsesWith(ConstantInt::getBool(ICmp->getContext(), *Ev)); DeadInsts.emplace_back(ICmp); + invalidateIfProfitable(); LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); } else if (makeIVComparisonInvariant(ICmp, IVOperand)) { + invalidateIfProfitable(); // fallthrough to end of function } else if (ICmpInst::isSigned(OriginalPred) && SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) { @@ -799,27 +818,6 @@ } } -/// Return true if this instruction generates a simple SCEV -/// expression in terms of that IV. -/// -/// This is similar to IVUsers' isInteresting() but processes each instruction -/// non-recursively when the operand is already known to be a simpleIVUser. -/// -static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { - if (!SE->isSCEVable(I->getType())) - return false; - - // Get the symbolic expression for this instruction. - const SCEV *S = SE->getSCEV(I); - - // Only consider affine recurrences. - const SCEVAddRecExpr *AR = dyn_cast(S); - if (AR && AR->getLoop() == L) - return true; - - return false; -} - /// Iteratively perform simplification on a worklist of users /// of the specified induction variable. Each successive simplification may push /// more users which may themselves be candidates for simplification. @@ -901,7 +899,7 @@ V->visitCast(Cast); continue; } - if (isSimpleIVUser(UseInst, L, SE)) { + if (SE->isSCEVable(UseInst->getType())) { pushIVUsers(UseInst, L, Simplified, SimpleIVUsers); } } Index: llvm/test/Transforms/IndVarSimplify/X86/pr35406.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/X86/pr35406.ll +++ llvm/test/Transforms/IndVarSimplify/X86/pr35406.ll @@ -75,21 +75,22 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop1: -; CHECK-NEXT: [[LOCAL_0_:%.*]] = phi i32 [ 8, [[ENTRY:%.*]] ], [ [[I9:%.*]], [[LOOP2_EXIT:%.*]] ] -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LOCAL_0_]], 15 +; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT2:%.*]], [[LOOP2_EXIT:%.*]] ], [ 8, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV1]], 15 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[GENERAL_CASE24:%.*]] ; CHECK: general_case24: ; CHECK-NEXT: br i1 false, label [[LOOP2_PREHEADER:%.*]], label [[LOOP2_EXIT]] ; CHECK: loop2.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = udiv i32 14, [[LOCAL_0_]] -; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 60392, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], -1 -; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 60392 +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVARS_IV1]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 14, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = udiv i32 60392, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = add nsw i64 [[TMP5]], 60392 ; CHECK-NEXT: br label [[LOOP2:%.*]] ; CHECK: loop2: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP5]], [[LOOP2_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP2]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP6]], [[LOOP2_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP2]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: [[I4:%.*]] = load atomic i64, i64* [[P1:%.*]] unordered, align 8 ; CHECK-NEXT: [[I6:%.*]] = sub i64 [[I4]], [[INDVARS_IV_NEXT]] @@ -98,7 +99,7 @@ ; CHECK: loop2.exit.loopexit: ; CHECK-NEXT: br label [[LOOP2_EXIT]] ; CHECK: loop2.exit: -; CHECK-NEXT: [[I9]] = add nuw nsw i32 [[LOCAL_0_]], 1 +; CHECK-NEXT: [[INDVARS_IV_NEXT2]] = add nuw nsw i64 [[INDVARS_IV1]], 1 ; CHECK-NEXT: br i1 false, label [[EXIT]], label [[LOOP1]] ; CHECK: exit: ; CHECK-NEXT: ret i32 0 Index: llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll +++ llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll @@ -15,11 +15,9 @@ ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @global, i64 0, i64 2), i64 -1 -; CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP4]], align 1 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i8* [[TMP4]], getelementptr inbounds ([0 x i8], [0 x i8]* @global, i64 0, i64 500) ; CHECK-NEXT: br i1 [[TMP5]], label [[BB7:%.*]], label [[BB11:%.*]] ; CHECK: bb7: -; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 ; CHECK-NEXT: br i1 true, label [[BB11]], label [[BB3]] ; CHECK: bb11: ; CHECK-NEXT: ret void Index: llvm/test/Transforms/IndVarSimplify/infer-poison-flags.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/infer-poison-flags.ll +++ llvm/test/Transforms/IndVarSimplify/infer-poison-flags.ll @@ -347,8 +347,7 @@ ; CHECK-NEXT: [[I:%.*]] = phi i32 [ -1024, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[I_NEXT]] = ashr i32 [[I]], 1 ; CHECK-NEXT: store i32 [[I]], i32* @A, align 4 -; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[I_NEXT]], 1 -; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]] +; CHECK-NEXT: br i1 true, label [[LOOP]], label [[LOOPEXIT:%.*]] ; CHECK: loopexit: ; CHECK-NEXT: ret void ; Index: llvm/test/Transforms/IndVarSimplify/strengthen-overflow.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/strengthen-overflow.ll +++ llvm/test/Transforms/IndVarSimplify/strengthen-overflow.ll @@ -182,11 +182,6 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[K_021:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[K_021]] -; CHECK-NEXT: [[SHR1:%.*]] = ashr exact i32 [[SHL]], 1 -; CHECK-NEXT: [[SHR2:%.*]] = lshr exact i32 [[SHL]], 1 -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[K_021]], 1 ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -212,11 +207,6 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[K_021:%.*]] = phi i32 [ 3, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[K_021]] -; CHECK-NEXT: [[SHR1:%.*]] = ashr exact i32 [[SHL]], 2 -; CHECK-NEXT: [[SHR2:%.*]] = lshr exact i32 [[SHL]], 2 -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[K_021]], 1 ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -242,11 +232,6 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[K_021:%.*]] = phi i32 [ 2, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[K_021]] -; CHECK-NEXT: [[SHR1:%.*]] = ashr exact i32 [[SHL]], 2 -; CHECK-NEXT: [[SHR2:%.*]] = lshr exact i32 [[SHL]], 2 -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[K_021]], 1 ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -272,11 +257,6 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[K_021:%.*]] = phi i32 [ 2, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[K_021]] -; CHECK-NEXT: [[SHR1:%.*]] = ashr i32 [[SHL]], 3 -; CHECK-NEXT: [[SHR2:%.*]] = lshr i32 [[SHL]], 3 -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[K_021]], 1 ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void Index: llvm/test/Transforms/IndVarSimplify/zext-nuw.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/zext-nuw.ll +++ llvm/test/Transforms/IndVarSimplify/zext-nuw.ll @@ -23,7 +23,7 @@ ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP3]], [[TMP4]] ; CHECK-NEXT: br label [[DOTPREHEADER4:%.*]] ; CHECK: .preheader4: -; CHECK-NEXT: [[K_09:%.*]] = phi i8* [ undef, [[DOTPREHEADER4_LR_PH]] ], [ [[X25:%.*]], [[X22:%.*]] ] +; CHECK-NEXT: [[K_09:%.*]] = phi i8* [ undef, [[DOTPREHEADER4_LR_PH]] ], [ [[K_1_LCSSA:%.*]], [[X22:%.*]] ] ; CHECK-NEXT: [[X8:%.*]] = icmp ult i32 0, 4 ; CHECK-NEXT: br i1 [[X8]], label [[DOTPREHEADER_LR_PH:%.*]], label [[X22]] ; CHECK: .preheader.lr.ph: @@ -36,8 +36,7 @@ ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[K_09]], i64 [[TMP5]] ; CHECK-NEXT: br label [[X22]] ; CHECK: x22: -; CHECK-NEXT: [[K_1_LCSSA:%.*]] = phi i8* [ [[SCEVGEP]], [[DOT_CRIT_EDGE_8]] ], [ [[K_09]], [[DOTPREHEADER4]] ] -; CHECK-NEXT: [[X25]] = getelementptr i8, i8* [[K_1_LCSSA]] +; CHECK-NEXT: [[K_1_LCSSA]] = phi i8* [ [[SCEVGEP]], [[DOT_CRIT_EDGE_8]] ], [ [[K_09]], [[DOTPREHEADER4]] ] ; CHECK-NEXT: br label [[DOTPREHEADER4]] ; %x2 = load i32, i32* @d Index: llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll +++ llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll @@ -4367,10 +4367,8 @@ ; EPILOG-BLOCK: outerloop: ; EPILOG-BLOCK-NEXT: %i = phi i64 [ 3, %bb ], [ 0, %outerloop.loopexit.1 ] ; EPILOG-BLOCK-NEXT: %0 = sub i64 100, %i -; EPILOG-BLOCK-NEXT: %1 = sub i64 99, %i ; EPILOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 -; EPILOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 -; EPILOG-BLOCK-NEXT: br i1 %2, label %exit.unr-lcssa, label %outerloop.new +; EPILOG-BLOCK-NEXT: br i1 false, label %exit.unr-lcssa, label %outerloop.new ; EPILOG-BLOCK: outerloop.new: ; EPILOG-BLOCK-NEXT: %unroll_iter = sub i64 %0, %xtraiter ; EPILOG-BLOCK-NEXT: br label %innerH @@ -4512,7 +4510,6 @@ ; PROLOG-BLOCK: outerloop: ; PROLOG-BLOCK-NEXT: %i = phi i64 [ 3, %bb ], [ 0, %outerloop.loopexit.1 ] ; PROLOG-BLOCK-NEXT: %0 = sub i64 100, %i -; PROLOG-BLOCK-NEXT: %1 = sub i64 99, %i ; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 ; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %innerH.prol.preheader, label %innerH.prol.loopexit @@ -4525,8 +4522,7 @@ ; PROLOG-BLOCK-NEXT: br label %innerH.prol.loopexit ; PROLOG-BLOCK: innerH.prol.loopexit: ; PROLOG-BLOCK-NEXT: %i3.unr = phi i64 [ %i, %outerloop ], [ %i4.prol, %latch.prol ] -; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 -; PROLOG-BLOCK-NEXT: br i1 %2, label %exit.loopexit, label %outerloop.new +; PROLOG-BLOCK-NEXT: br i1 false, label %exit.loopexit, label %outerloop.new ; PROLOG-BLOCK: outerloop.new: ; PROLOG-BLOCK-NEXT: br label %innerH ; PROLOG-BLOCK: innerH: Index: llvm/test/Transforms/LoopUnroll/scevunroll.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/scevunroll.ll +++ llvm/test/Transforms/LoopUnroll/scevunroll.ll @@ -170,7 +170,7 @@ ; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[BASE:%.*]], align 4 ; CHECK-NEXT: br label [[L2:%.*]] ; CHECK: l2: -; CHECK-NEXT: br label [[L3:%.*]] +; CHECK-NEXT: br i1 true, label [[L3:%.*]], label [[EXIT2:%.*]] ; CHECK: l3: ; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[VAL]], 0 ; CHECK-NEXT: br i1 [[CMP3]], label [[L1_1:%.*]], label [[EXIT3:%.*]] @@ -185,7 +185,7 @@ ; CHECK-NEXT: [[VAL_1:%.*]] = load i32, i32* [[ADR_1]], align 4 ; CHECK-NEXT: br label [[L2_1:%.*]] ; CHECK: l2.1: -; CHECK-NEXT: br label [[L3_1:%.*]] +; CHECK-NEXT: br i1 true, label [[L3_1:%.*]], label [[EXIT2]] ; CHECK: l3.1: ; CHECK-NEXT: [[CMP3_1:%.*]] = icmp ne i32 [[VAL_1]], 0 ; CHECK-NEXT: br i1 [[CMP3_1]], label [[L1_2:%.*]], label [[EXIT3]] @@ -194,7 +194,7 @@ ; CHECK-NEXT: [[VAL_2:%.*]] = load i32, i32* [[ADR_2]], align 4 ; CHECK-NEXT: br label [[L2_2:%.*]] ; CHECK: l2.2: -; CHECK-NEXT: br label [[L3_2:%.*]] +; CHECK-NEXT: br i1 true, label [[L3_2:%.*]], label [[EXIT2]] ; CHECK: l3.2: ; CHECK-NEXT: [[CMP3_2:%.*]] = icmp ne i32 [[VAL_2]], 0 ; CHECK-NEXT: br i1 [[CMP3_2]], label [[L1_3:%.*]], label [[EXIT3]] @@ -203,7 +203,7 @@ ; CHECK-NEXT: [[VAL_3:%.*]] = load i32, i32* [[ADR_3]], align 4 ; CHECK-NEXT: br label [[L2_3:%.*]] ; CHECK: l2.3: -; CHECK-NEXT: br label [[L3_3:%.*]] +; CHECK-NEXT: br i1 true, label [[L3_3:%.*]], label [[EXIT2]] ; CHECK: l3.3: ; CHECK-NEXT: [[CMP3_3:%.*]] = icmp ne i32 [[VAL_3]], 0 ; CHECK-NEXT: br i1 [[CMP3_3]], label [[L1_4:%.*]], label [[EXIT3]] @@ -212,14 +212,14 @@ ; CHECK-NEXT: [[VAL_4:%.*]] = load i32, i32* [[ADR_4]], align 4 ; CHECK-NEXT: br label [[L2_4:%.*]] ; CHECK: l2.4: -; CHECK-NEXT: br label [[L3_4:%.*]] +; CHECK-NEXT: br i1 true, label [[L3_4:%.*]], label [[EXIT2]] ; CHECK: l3.4: ; CHECK-NEXT: [[CMP3_4:%.*]] = icmp ne i32 [[VAL_4]], 0 ; CHECK-NEXT: br i1 [[CMP3_4]], label [[L1_5:%.*]], label [[EXIT3]] ; CHECK: l1.5: ; CHECK-NEXT: br i1 false, label [[L2_5:%.*]], label [[EXIT1:%.*]] ; CHECK: l2.5: -; CHECK-NEXT: br i1 true, label [[L3_5:%.*]], label [[EXIT2:%.*]] +; CHECK-NEXT: br i1 true, label [[L3_5:%.*]], label [[EXIT2]] ; CHECK: l3.5: ; CHECK-NEXT: br label [[EXIT3]] ; @@ -314,7 +314,7 @@ ; CHECK: for.cond: ; CHECK-NEXT: br i1 false, label [[RETURN:%.*]], label [[FOR_BODY_1:%.*]] ; CHECK: return: -; CHECK-NEXT: [[B_03_LCSSA:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 8, [[FOR_BODY_1]] ], [ 0, [[FOR_COND_1:%.*]] ] +; CHECK-NEXT: [[B_03_LCSSA:%.*]] = phi i32 [ 8, [[FOR_COND]] ], [ 8, [[FOR_BODY_1]] ], [ 8, [[FOR_COND_1:%.*]] ] ; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY_1]] ], [ 0, [[FOR_COND_1]] ] ; CHECK-NEXT: store i32 [[B_03_LCSSA]], i32* [[A:%.*]], align 4 ; CHECK-NEXT: ret void