Index: lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- lib/Transforms/Scalar/IndVarSimplify.cpp +++ lib/Transforms/Scalar/IndVarSimplify.cpp @@ -2717,6 +2717,24 @@ if (isa(MaxExitCount)) return false; + // Visit our exit blocks in order of dominance. We know from the fact that + // all exits (left) are analyzeable that the must be a total dominance order + // between them as each must dominate the latch. The visit order only + // matters for the provably equal case. + llvm::sort(ExitingBlocks, + [&](BasicBlock *A, BasicBlock *B) { + // std::sort sorts in ascending order, so we want the inverse of + // the normal dominance relation. + if (DT->properlyDominates(A, B)) return true; + if (DT->properlyDominates(B, A)) return false; + llvm_unreachable("expected total dominance order!"); + }); +#ifdef ASSERT + for (unsigned i = 1; i < ExitingBlocks.size(); i++) { + assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); + } +#endif + auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) { BranchInst *BI = cast(ExitingBB->getTerminator()); bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); @@ -2729,6 +2747,7 @@ }; bool Changed = false; + SmallSet DominatingExitCounts; for (BasicBlock *ExitingBB : ExitingBlocks) { const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); assert(!isa(ExitCount) && "checked above"); @@ -2766,10 +2785,15 @@ continue; } - // TODO: If we can prove that the exiting iteration is equal to the exit - // count for this exit and that no previous exit oppurtunities exist within - // the loop, then we can discharge all other exits. (May fall out of - // previous TODO.) + // As we run, keep track of which exit counts we've encountered. If we + // find a duplicate, we've found an exit which would have exited on the + // exiting iteration, but (from the visit order) strictly follows another + // which does the same and is thus dead. + if (!DominatingExitCounts.insert(ExitCount).second) { + FoldExit(ExitingBB, false); + Changed = true; + continue; + } } return Changed; } Index: test/Transforms/IndVarSimplify/eliminate-exit.ll =================================================================== --- test/Transforms/IndVarSimplify/eliminate-exit.ll +++ test/Transforms/IndVarSimplify/eliminate-exit.ll @@ -185,5 +185,39 @@ ret void } +define void @mixed_width(i32 %len) { +; CHECK-LABEL: @mixed_width( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN_ZEXT:%.*]] = zext i32 [[LEN:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[IV]], [[LEN_ZEXT]] +; CHECK-NEXT: br i1 [[CMP1]], label [[BACKEDGE]], label [[EXIT:%.*]] +; CHECK: backedge: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 true, label [[LOOP]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %len.zext = zext i32 %len to i64 + br label %loop +loop: + %iv = phi i64 [0, %entry], [%iv.next, %backedge] + %iv2 = phi i32 [0, %entry], [%iv2.next, %backedge] + %iv.next = add i64 %iv, 1 + %iv2.next = add i32 %iv2, 1 + %cmp1 = icmp ult i64 %iv, %len.zext + br i1 %cmp1, label %backedge, label %exit + +backedge: + call void @side_effect() + %cmp2 = icmp ult i32 %iv2, %len + br i1 %cmp2, label %loop, label %exit +exit: + ret void +} declare void @side_effect() Index: test/Transforms/IndVarSimplify/loop-predication.ll =================================================================== --- test/Transforms/IndVarSimplify/loop-predication.ll +++ test/Transforms/IndVarSimplify/loop-predication.ll @@ -464,7 +464,6 @@ ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] ; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] @@ -478,7 +477,7 @@ ; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-NEXT: br i1 [[TMP4]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 ; CHECK: deopt2: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -784,7 +783,7 @@ ; If we have a dominating exit (exit1) which can't be itself rewritten, we ; can't rewrite a later exit (exit2). Doing so would cause the loop to exit ; from the exit2 when it should have exited from exit1. -define i32 @neg_dominating_exit(i32* %array, i32 %length, i32 %n) { +define i32 @neg_dominating_exit(i32* %array, i32 %length, i32 %length2, i32 %n) { ; CHECK-LABEL: @neg_dominating_exit( ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -798,7 +797,7 @@ ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 [[RESULT]] ; CHECK: guarded: -; CHECK-NEXT: [[WITHIN_BOUNDS2:%.*]] = icmp ult i32 [[I]], [[LENGTH]] +; CHECK-NEXT: [[WITHIN_BOUNDS2:%.*]] = icmp ult i32 [[I]], [[LENGTH2:%.*]] ; CHECK-NEXT: br i1 [[WITHIN_BOUNDS2]], label [[GUARDED2]], label [[DEOPT2:%.*]], !prof !0 ; CHECK: deopt2: ; CHECK-NEXT: call void @prevent_merging() @@ -830,7 +829,7 @@ ret i32 %result guarded: ; preds = %loop - %within.bounds2 = icmp ult i32 %i, %length + %within.bounds2 = icmp ult i32 %i, %length2 br i1 %within.bounds2, label %guarded2, label %deopt2, !prof !0 deopt2: ; preds = %loop Index: test/Transforms/IndVarSimplify/pr38674.ll =================================================================== --- test/Transforms/IndVarSimplify/pr38674.ll +++ test/Transforms/IndVarSimplify/pr38674.ll @@ -14,10 +14,9 @@ ; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 1 to i32 ; CHECK-NEXT: br label [[FOR_BODY6:%.*]] ; CHECK: for.cond4: -; CHECK-NEXT: [[CMP5:%.*]] = icmp ult i32 [[INC:%.*]], 2 -; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY6]], label [[FOR_END:%.*]] +; CHECK-NEXT: br i1 true, label [[FOR_BODY6]], label [[FOR_END:%.*]] ; CHECK: for.body6: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[FOR_COND4_PREHEADER]] ], [ [[INC]], [[FOR_COND4:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[FOR_COND4_PREHEADER]] ], [ [[INC:%.*]], [[FOR_COND4:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], [[ZEXT]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[TMP0]], label [[RETURN_LOOPEXIT:%.*]], label [[FOR_COND4]]