Index: lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- lib/Transforms/Scalar/IndVarSimplify.cpp +++ lib/Transforms/Scalar/IndVarSimplify.cpp @@ -2768,7 +2768,7 @@ !isSafeToExpand(ExactBTC, *SE)) return Changed; - auto Filter = [&](BasicBlock *ExitingBB) { + auto BadExit = [&](BasicBlock *ExitingBB) { // If our exiting block exits multiple loops, we can only rewrite the // innermost one. Otherwise, we're changing how many times the innermost // loop runs before it exits. @@ -2800,15 +2800,43 @@ return false; }; - auto Erased = std::remove_if(ExitingBlocks.begin(), ExitingBlocks.end(), - Filter); - ExitingBlocks.erase(Erased, ExitingBlocks.end()); + + // If we have any exits which can't be predicated themselves, than we can't + // predicate any exit which isn't guaranteed to execute before it. Consider + // two exits (a) and (b) which would both exit on the same iteration. If we + // can predicate (b), but not (a), and (a) preceeds (b) along some path, then + // we could convert a loop from exiting through (a) to one exiting through + // (b). Note that this problem exists only for exits with the same exit + // count, and we could be more aggressive when exit counts are known inequal. + llvm::sort(ExitingBlocks, + [&](BasicBlock *A, BasicBlock *B) { + // std::sort sorts in ascending order, so we want the inverse of + // the normal dominance relation, plus a tie breaker for blocks + // unordered by dominance. + if (DT->properlyDominates(A, B)) return true; + if (DT->properlyDominates(B, A)) return false; + return A->getName() < B->getName(); + }); + // Check to see if our exit blocks are a total order (i.e. a linear chain of + // exits before the backedge). If they aren't, reasoning about reachability + // is complicated and we choose not to for now. + for (unsigned i = 1; i < ExitingBlocks.size(); i++) + if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) + return Changed; + + // Given our sorted total order, we know that exit[j] must be evaluated + // after all exit[i] such j > i. + for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) + if (BadExit(ExitingBlocks[i])) { + ExitingBlocks.resize(i); + break; + } if (ExitingBlocks.empty()) return Changed; // We rely on not being able to reach an exiting block on a later iteration - // than it's statically compute exit count. The implementaton of + // then it's statically compute exit count. The implementaton of // getExitCount currently has this invariant, but assert it here so that // breakage is obvious if this ever changes.. assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { Index: test/Transforms/IndVarSimplify/loop-predication.ll =================================================================== --- test/Transforms/IndVarSimplify/loop-predication.ll +++ test/Transforms/IndVarSimplify/loop-predication.ll @@ -788,24 +788,19 @@ define i32 @neg_dominating_exit(i32* %array, i32 %length, i32 %n) { ; CHECK-LABEL: @neg_dominating_exit( ; CHECK-NEXT: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] -; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED2:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED2]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]] +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] ; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 ; CHECK: deopt: ; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC]], [[LOOP]] ] ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 [[RESULT]] ; CHECK: guarded: -; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED2]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: [[WITHIN_BOUNDS2:%.*]] = icmp ult i32 [[I]], [[LENGTH]] +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS2]], label [[GUARDED2]], label [[DEOPT2:%.*]], !prof !0 ; CHECK: deopt2: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -815,7 +810,7 @@ ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 -; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: [[RESULT2:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED2]] ]