diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -3002,8 +3002,8 @@ auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc; auto *TI = cast(Candidate.TI); auto *BB = Candidate.TI->getParent(); - assert(InLoopSucc == TI->getSuccessor(0)); - auto *OutOfLoopSucc = TI->getSuccessor(1); + auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1) + : TI->getSuccessor(0); // FIXME: Remove this once limitation on successors is lifted. assert(L.contains(InLoopSucc) && "Not supported yet!"); assert(!L.contains(OutOfLoopSucc) && "Not supported yet!"); diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll @@ -438,6 +438,76 @@ ret i32 -2 } +define i32 @test_02_inverse(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr noundef %arr, ptr noundef %x_p) { +; CHECK-LABEL: @test_02_inverse( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P:%.*]], align 4, !noundef !0 +; CHECK-NEXT: [[INJECTED_COND:%.*]] = icmp ule i32 -2147483648, [[X]] +; CHECK-NEXT: br i1 [[INJECTED_COND]], label [[LOOP_US:%.*]], label [[LOOP:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[EL_PTR_US:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[IV_US]] +; CHECK-NEXT: [[EL_US:%.*]] = load i32, ptr [[EL_PTR_US]], align 4 +; CHECK-NEXT: [[BOUND_CHECK_US:%.*]] = icmp sge i32 [[EL_US]], 0 +; CHECK-NEXT: br i1 [[BOUND_CHECK_US]], label [[GUARDED_US]], label [[COMMON_RET:%.*]], !prof [[PROF1]] +; CHECK: guarded.us: +; CHECK-NEXT: [[RANGE_CHECK_US:%.*]] = icmp uge i32 [[EL_US]], [[X]] +; CHECK-NEXT: [[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 [[EL_US]] +; CHECK-NEXT: store i32 [[IV_US]], ptr [[ARR_PTR_US]], align 4 +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]] +; CHECK-NEXT: br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[COMMON_RET]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; CHECK-NEXT: [[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4 +; CHECK-NEXT: [[BOUND_CHECK:%.*]] = icmp sge i32 [[EL]], 0 +; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[GUARDED:%.*]], label [[COMMON_RET]], !prof [[PROF1]] +; CHECK: guarded: +; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp uge i32 [[EL]], [[X]] +; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]], !llvm.invariant.condition.injection.disabled !0 +; CHECK: backedge: +; CHECK-NEXT: [[ARR_PTR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[EL]] +; CHECK-NEXT: store i32 [[IV]], ptr [[ARR_PTR]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]] +; CHECK: common.ret: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[BACKEDGE]] ], [ 0, [[GUARDED_US]] ], [ -1, [[LOOP]] ], [ -1, [[LOOP_US]] ], [ -2, [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; +entry: + %x = load i32, ptr %x_p, align 4, !noundef !{} + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %el.ptr = getelementptr i32, ptr %p, i32 %iv + %el = load i32, ptr %el.ptr, align 4 + %bound_check = icmp sge i32 %el, 0 + br i1 %bound_check, label %guarded, label %bound_check_failed, !prof !{!"branch_weights", i32 100, i32 1} + +guarded: ; preds = %loop + %range_check = icmp uge i32 %el, %x + br i1 %range_check, label %range_check_failed, label %backedge, !prof !{!"branch_weights", i32 1, i32 100} + +backedge: ; preds = %guarded + %arr.ptr = getelementptr i32, ptr %arr, i32 %el + store i32 %iv, ptr %arr.ptr, align 4 + %iv.next = add i32 %iv, 1 + %loop_cond = icmp slt i32 %iv.next, %n + br i1 %loop_cond, label %loop, label %exit + +exit: ; preds = %backedge + ret i32 0 + +bound_check_failed: ; preds = %loop + ret i32 -1 + +range_check_failed: ; preds = %guarded + ret i32 -2 +} + define i32 @test_03(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr noundef %arr, ptr noundef %x_p) { ; CHECK-LABEL: @test_03( ; CHECK-NEXT: entry: