Index: llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -117,14 +117,43 @@ cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation.")); +static cl::opt UnswitchVirtualInvariants( + "unswitch-virtual-invariants", cl::Hidden, + cl::desc("Whether we should insert virtual invariants and unswitch them."), + cl::init(false)); + namespace { +struct CompareDesc { + BranchInst *Term; + Value *Invariant; + BasicBlock *InLoopSucc; + + CompareDesc(BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc) + : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {} +}; + +struct VirtualInvariant { + ICmpInst::Predicate Pred; + Value *LHS; + Value *RHS; + BasicBlock *InLoopSucc; + + VirtualInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS, + BasicBlock *InLoopSucc) + : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {} +}; + struct NonTrivialUnswitchCandidate { Instruction *TI = nullptr; TinyPtrVector Invariants; Optional Cost; + Optional Virtual; NonTrivialUnswitchCandidate(Instruction *TI, ArrayRef Invariants, - Optional Cost = None) - : TI(TI), Invariants(Invariants), Cost(Cost) {}; + Optional Cost = None, + Optional Virtual = None) + : TI(TI), Invariants(Invariants), Cost(Cost), Virtual(Virtual) {}; + + bool isVirtual() const { return Virtual.has_value(); } }; } // end anonymous namespace. @@ -2826,6 +2855,144 @@ return !UnswitchCandidates.empty(); } +static bool matchPredicate(ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, + BasicBlock *&IfTrue, BasicBlock *&IfFalse, Loop &L) { + if (!L.contains(IfTrue)) { + Pred = ICmpInst::getInversePredicate(Pred); + std::swap(IfTrue, IfFalse); + } else if (L.contains(IfFalse)) + return false; + + if (L.isLoopInvariant(LHS)) { + Pred = ICmpInst::getSwappedPredicate(Pred); + std::swap(LHS, RHS); + } + + if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) { + // Turn "x >=s 0" into "x getContext(), + APInt::getSignedMinValue(RHS->getType()->getIntegerBitWidth())); + } + + if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS)) + return false; + return true; +} + +static NonTrivialUnswitchCandidate +materializeVirtualCandidate(NonTrivialUnswitchCandidate Virtual, Loop &L, + DominatorTree &DT, LoopInfo &LI, + AssumptionCache &AC, MemorySSAUpdater *MSSAU) { + assert(Virtual.isVirtual()); + BasicBlock *Preheader = L.getLoopPreheader(); + assert(Preheader && "Loop is not in simplified form?"); + IRBuilder<> Builder(Preheader->getTerminator()); + + auto Pred = Virtual.Virtual->Pred; + auto *LHS = Virtual.Virtual->LHS; + auto *RHS = Virtual.Virtual->RHS; + auto *InLoopSucc = Virtual.Virtual->InLoopSucc; + + assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!"); + if (LHS->getType() != RHS->getType()) { + if (LHS->getType()->getIntegerBitWidth() < + RHS->getType()->getIntegerBitWidth()) + LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide"); + else + RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide"); + } + + auto *VirtualCond = Builder.CreateICmp(Pred, LHS, RHS, "virtual.cond"); + auto *BB = Virtual.TI->getParent(); + auto BBSplit = SplitBlock(BB, BB->getTerminator(), &DT, &LI, MSSAU, + BB->getName() + ".split", /*Before*/ true); + SmallVector DTUpdates = { + { DominatorTree::Insert, BBSplit, InLoopSucc } + }; + Builder.SetInsertPoint(BBSplit->getTerminator()); + auto *NewTerm = Builder.CreateCondBr(VirtualCond, InLoopSucc, BB); + BBSplit->getTerminator()->eraseFromParent(); + DT.applyUpdates(DTUpdates); + if (MSSAU) + MSSAU->applyUpdates(DTUpdates, DT); + // Prevent infinite unswitching. + + BB->getTerminator()->setMetadata("llvm.virtual.unswitching.disabled", + MDNode::get(BB->getContext(), {})); + return NonTrivialUnswitchCandidate(NewTerm, { VirtualCond }, Virtual.Cost); +} + +static bool insertVirtualCandidates( + SmallVectorImpl &UnswitchCandidates, Loop &L, + ICmpInst::Predicate Pred, ArrayRef Compares, + const DominatorTree &DT) { + + assert(ICmpInst::isRelational(Pred)); + assert(ICmpInst::isStrictPredicate(Pred)); + if (Compares.size() < 2) + return false; + ICmpInst::Predicate NonStrictPred = ICmpInst::getNonStrictPredicate(Pred); + for (auto Prev = Compares.begin(), Next = Compares.begin() + 1; + Next != Compares.end(); ++Prev, ++Next) { + Value *LHS = Next->Invariant; + Value *RHS = Prev->Invariant; + BasicBlock *InLoopSucc = Prev->InLoopSucc; + VirtualInvariant VI(NonStrictPred, LHS, RHS, InLoopSucc); + NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS }, None, VI); + UnswitchCandidates.push_back(std::move(Candidate)); + } + return true; +} + +static bool collectVirtualUnswitchCandidates( + SmallVectorImpl &UnswitchCandidates, + IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, + const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, + const MemorySSAUpdater *MSSAU) { + if (!UnswitchVirtualInvariants) + return false; + + if (!DT.isReachableFromEntry(L.getHeader())) + return false; + auto *Latch = L.getLoopLatch(); + if (!Latch) + return false; + + DenseMap > CandidatesULT; + for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock()); + DTN = DTN->getIDom()) { + ICmpInst::Predicate Pred; + Value *LHS = nullptr, *RHS = nullptr; + BasicBlock *IfTrue = nullptr, *IfFalse = nullptr; + auto *BB = DTN->getBlock(); + auto *Term = BB->getTerminator(); + if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), + m_BasicBlock(IfTrue), m_BasicBlock(IfFalse)))) + continue; + if (Term->hasMetadata("llvm.virtual.unswitching.disabled")) + continue; + if (!matchPredicate(Pred, LHS, RHS, IfTrue, IfFalse, L)) + continue; + // TODO: Support other predicates. + if (Pred != ICmpInst::ICMP_ULT) + continue; + if (!L.contains(IfTrue) || L.contains(IfFalse)) + continue; + CompareDesc Desc(cast(Term), RHS, IfTrue); + while (auto *Zext = dyn_cast(LHS)) + LHS = Zext->getOperand(0); + CandidatesULT[LHS].push_back(Desc); + } + + bool Changed = false; + for (auto &It : CandidatesULT) + Changed |= insertVirtualCandidates(UnswitchCandidates, L, + ICmpInst::ICMP_ULT, It.second, DT); + return Changed; +} + static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI) { if (!L.isSafeToClone()) return false; @@ -2985,10 +3152,11 @@ Instruction &TI = *Candidate.TI; ArrayRef Invariants = Candidate.Invariants; BranchInst *BI = dyn_cast(&TI); - InstructionCost CandidateCost = ComputeUnswitchedCost( - TI, /*FullUnswitch*/ !BI || - (Invariants.size() == 1 && - Invariants[0] == skipTrivialSelect(BI->getCondition()))); + bool FullUnswitch = + !BI || Candidate.isVirtual() || + (Invariants.size() == 1 && + Invariants[0] == skipTrivialSelect(BI->getCondition())); + InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch); // Calculate cost multiplier which is a tool to limit potentially // exponential behavior of loop-unswitch. if (EnableUnswitchCostMultiplier) { @@ -3027,8 +3195,11 @@ IVConditionInfo PartialIVInfo; Instruction *PartialIVCondBranch = nullptr; // If we didn't find any candidates, we're done. - if (!collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo, - PartialIVCondBranch, L, LI, AA, MSSAU)) + collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo, + PartialIVCondBranch, L, LI, AA, MSSAU); + collectVirtualUnswitchCandidates(UnswitchCandidates, PartialIVInfo, + PartialIVCondBranch, L, DT, LI, AA, MSSAU); + if (UnswitchCandidates.empty()) return false; LLVM_DEBUG( @@ -3037,7 +3208,6 @@ NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate( UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo); - assert(Best.TI && "Failed to find loop unswitch candidate"); assert(Best.Cost && "Failed to compute cost"); @@ -3047,6 +3217,10 @@ return false; } + if (Best.isVirtual()) + Best = materializeVirtualCandidate(Best, L, DT, LI, AC, MSSAU); + assert(!Best.isVirtual() && "Should be materialized by now!"); + if (Best.TI != PartialIVCondBranch) PartialIVInfo.InstToDuplicate.clear(); Index: llvm/test/Transforms/SimpleLoopUnswitch/insert-invariant-conditions.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimpleLoopUnswitch/insert-invariant-conditions.ll @@ -0,0 +1,286 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -enable-nontrivial-unswitch -unswitch-virtual-invariants -S -passes="loop(simple-loop-unswitch),simplifycfg" | FileCheck %s +; RUN: opt < %s -enable-nontrivial-unswitch -unswitch-virtual-invariants -S -passes="loop-mssa(simple-loop-unswitch),simplifycfg" -verify-memoryssa | FileCheck %s + +define i32 @test_01(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr noundef %arr, ptr noundef %len_p) { +; CHECK-LABEL: @test_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[LEN_P:%.*]], align 4, !noundef !0 +; CHECK-NEXT: [[VIRTUAL_COND:%.*]] = icmp ule i32 [[LIMIT:%.*]], [[LEN]] +; CHECK-NEXT: br i1 [[VIRTUAL_COND]], label [[LOOP_US:%.*]], label [[LOOP:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], [[GUARDED_SPLIT_US:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[EL_PTR_US:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[IV_US]] +; CHECK-NEXT: [[EL_US:%.*]] = load i32, ptr [[EL_PTR_US]], align 4 +; CHECK-NEXT: [[BOUND_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[LIMIT]] +; CHECK-NEXT: br i1 [[BOUND_CHECK_US]], label [[GUARDED_SPLIT_US]], label [[COMMON_RET:%.*]] +; CHECK: guarded.split.us: +; CHECK-NEXT: [[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[LEN]] +; CHECK-NEXT: [[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 [[EL_US]] +; CHECK-NEXT: store i32 [[IV_US]], ptr [[ARR_PTR_US]], align 4 +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]] +; CHECK-NEXT: br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[COMMON_RET]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; CHECK-NEXT: [[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4 +; CHECK-NEXT: [[BOUND_CHECK:%.*]] = icmp ult i32 [[EL]], [[LIMIT]] +; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[GUARDED_SPLIT:%.*]], label [[COMMON_RET]] +; CHECK: guarded.split: +; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[EL]], [[LEN]] +; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]], !llvm.virtual.unswitching.disabled !0 +; CHECK: backedge: +; CHECK-NEXT: [[ARR_PTR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[EL]] +; CHECK-NEXT: store i32 [[IV]], ptr [[ARR_PTR]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]] +; CHECK: common.ret: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[BACKEDGE]] ], [ 0, [[GUARDED_SPLIT_US]] ], [ -1, [[LOOP]] ], [ -1, [[LOOP_US]] ], [ -2, [[GUARDED_SPLIT]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; +entry: + %len = load i32, ptr %len_p, align 4, !noundef !{} + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %el.ptr = getelementptr i32, ptr %p, i32 %iv + %el = load i32, ptr %el.ptr, align 4 + %bound_check = icmp ult i32 %el, %limit + br i1 %bound_check, label %guarded, label %bound_check_failed + +guarded: ; preds = %loop + %range_check = icmp ult i32 %el, %len + br i1 %range_check, label %backedge, label %range_check_failed + +backedge: ; preds = %guarded + %arr.ptr = getelementptr i32, ptr %arr, i32 %el + store i32 %iv, ptr %arr.ptr, align 4 + %iv.next = add i32 %iv, 1 + %loop_cond = icmp slt i32 %iv.next, %n + br i1 %loop_cond, label %loop, label %exit + +exit: ; preds = %backedge + ret i32 0 + +bound_check_failed: ; preds = %loop + ret i32 -1 + +range_check_failed: ; preds = %guarded + ret i32 -2 +} + +define i32 @test_02(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr noundef %arr, ptr noundef %len_p) { +; CHECK-LABEL: @test_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[LEN_P:%.*]], align 4, !noundef !0 +; CHECK-NEXT: [[VIRTUAL_COND:%.*]] = icmp ule i32 -2147483648, [[LEN]] +; CHECK-NEXT: br i1 [[VIRTUAL_COND]], label [[LOOP_US:%.*]], label [[LOOP:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], [[GUARDED_SPLIT_US:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[EL_PTR_US:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[IV_US]] +; CHECK-NEXT: [[EL_US:%.*]] = load i32, ptr [[EL_PTR_US]], align 4 +; CHECK-NEXT: [[BOUND_CHECK_US:%.*]] = icmp sge i32 [[EL_US]], 0 +; CHECK-NEXT: br i1 [[BOUND_CHECK_US]], label [[GUARDED_SPLIT_US]], label [[COMMON_RET:%.*]] +; CHECK: guarded.split.us: +; CHECK-NEXT: [[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[LEN]] +; CHECK-NEXT: [[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 [[EL_US]] +; CHECK-NEXT: store i32 [[IV_US]], ptr [[ARR_PTR_US]], align 4 +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]] +; CHECK-NEXT: br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[COMMON_RET]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; CHECK-NEXT: [[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4 +; CHECK-NEXT: [[BOUND_CHECK:%.*]] = icmp sge i32 [[EL]], 0 +; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[GUARDED_SPLIT:%.*]], label [[COMMON_RET]] +; CHECK: guarded.split: +; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[EL]], [[LEN]] +; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]], !llvm.virtual.unswitching.disabled !0 +; CHECK: backedge: +; CHECK-NEXT: [[ARR_PTR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[EL]] +; CHECK-NEXT: store i32 [[IV]], ptr [[ARR_PTR]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]] +; CHECK: common.ret: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[BACKEDGE]] ], [ 0, [[GUARDED_SPLIT_US]] ], [ -1, [[LOOP]] ], [ -1, [[LOOP_US]] ], [ -2, [[GUARDED_SPLIT]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; +entry: + %len = load i32, ptr %len_p, align 4, !noundef !{} + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %el.ptr = getelementptr i32, ptr %p, i32 %iv + %el = load i32, ptr %el.ptr, align 4 + %bound_check = icmp sge i32 %el, 0 + br i1 %bound_check, label %guarded, label %bound_check_failed + +guarded: ; preds = %loop + %range_check = icmp ult i32 %el, %len + br i1 %range_check, label %backedge, label %range_check_failed + +backedge: ; preds = %guarded + %arr.ptr = getelementptr i32, ptr %arr, i32 %el + store i32 %iv, ptr %arr.ptr, align 4 + %iv.next = add i32 %iv, 1 + %loop_cond = icmp slt i32 %iv.next, %n + br i1 %loop_cond, label %loop, label %exit + +exit: ; preds = %backedge + ret i32 0 + +bound_check_failed: ; preds = %loop + ret i32 -1 + +range_check_failed: ; preds = %guarded + ret i32 -2 +} + +define i32 @test_03(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr noundef %arr, ptr noundef %len_p) { +; CHECK-LABEL: @test_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[LEN_P:%.*]], align 4, !noundef !0 +; CHECK-NEXT: [[VIRTUAL_COND:%.*]] = icmp ule i32 -2147483648, [[LEN]] +; CHECK-NEXT: br i1 [[VIRTUAL_COND]], label [[LOOP_US:%.*]], label [[LOOP:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], [[GUARDED_SPLIT_US:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[EL_PTR_US:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[IV_US]] +; CHECK-NEXT: [[EL_US:%.*]] = load i32, ptr [[EL_PTR_US]], align 4 +; CHECK-NEXT: [[BOUND_CHECK_US:%.*]] = icmp slt i32 [[EL_US]], 0 +; CHECK-NEXT: br i1 [[BOUND_CHECK_US]], label [[COMMON_RET:%.*]], label [[GUARDED_SPLIT_US]] +; CHECK: guarded.split.us: +; CHECK-NEXT: [[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[LEN]] +; CHECK-NEXT: [[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 [[EL_US]] +; CHECK-NEXT: store i32 [[IV_US]], ptr [[ARR_PTR_US]], align 4 +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]] +; CHECK-NEXT: br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[COMMON_RET]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; CHECK-NEXT: [[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4 +; CHECK-NEXT: [[BOUND_CHECK:%.*]] = icmp slt i32 [[EL]], 0 +; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[COMMON_RET]], label [[GUARDED_SPLIT:%.*]] +; CHECK: guarded.split: +; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[EL]], [[LEN]] +; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]], !llvm.virtual.unswitching.disabled !0 +; CHECK: backedge: +; CHECK-NEXT: [[ARR_PTR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[EL]] +; CHECK-NEXT: store i32 [[IV]], ptr [[ARR_PTR]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]] +; CHECK: common.ret: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[BACKEDGE]] ], [ 0, [[GUARDED_SPLIT_US]] ], [ -1, [[LOOP]] ], [ -1, [[LOOP_US]] ], [ -2, [[GUARDED_SPLIT]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; +entry: + %len = load i32, ptr %len_p, align 4, !noundef !{} + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %el.ptr = getelementptr i32, ptr %p, i32 %iv + %el = load i32, ptr %el.ptr, align 4 + %bound_check = icmp slt i32 %el, 0 + br i1 %bound_check, label %bound_check_failed, label %guarded + +guarded: ; preds = %loop + %range_check = icmp ult i32 %el, %len + br i1 %range_check, label %backedge, label %range_check_failed + +backedge: ; preds = %guarded + %arr.ptr = getelementptr i32, ptr %arr, i32 %el + store i32 %iv, ptr %arr.ptr, align 4 + %iv.next = add i32 %iv, 1 + %loop_cond = icmp slt i32 %iv.next, %n + br i1 %loop_cond, label %loop, label %exit + +exit: ; preds = %backedge + ret i32 0 + +bound_check_failed: ; preds = %loop + ret i32 -1 + +range_check_failed: ; preds = %guarded + ret i32 -2 +} + +define i32 @test_04(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr noundef %arr, ptr noundef %len_p) { +; CHECK-LABEL: @test_04( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[LEN_P:%.*]], align 4, !noundef !0 +; CHECK-NEXT: [[VIRTUAL_COND:%.*]] = icmp ule i32 128, [[LEN]] +; CHECK-NEXT: br i1 [[VIRTUAL_COND]], label [[LOOP_US:%.*]], label [[LOOP:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], [[GUARDED_SPLIT_US:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[EL_PTR_US:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[IV_US]] +; CHECK-NEXT: [[EL_US:%.*]] = load i8, ptr [[EL_PTR_US]], align 4 +; CHECK-NEXT: [[BOUND_CHECK_US:%.*]] = icmp slt i8 [[EL_US]], 0 +; CHECK-NEXT: br i1 [[BOUND_CHECK_US]], label [[COMMON_RET:%.*]], label [[GUARDED_SPLIT_US]] +; CHECK: guarded.split.us: +; CHECK-NEXT: [[EL_WIDE_US:%.*]] = zext i8 [[EL_US]] to i32 +; CHECK-NEXT: [[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL_WIDE_US]], [[LEN]] +; CHECK-NEXT: [[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 [[EL_WIDE_US]] +; CHECK-NEXT: store i32 [[IV_US]], ptr [[ARR_PTR_US]], align 4 +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]] +; CHECK-NEXT: br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[COMMON_RET]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i8, ptr [[P]], i32 [[IV]] +; CHECK-NEXT: [[EL:%.*]] = load i8, ptr [[EL_PTR]], align 4 +; CHECK-NEXT: [[BOUND_CHECK:%.*]] = icmp slt i8 [[EL]], 0 +; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[COMMON_RET]], label [[GUARDED_SPLIT:%.*]] +; CHECK: guarded.split: +; CHECK-NEXT: [[EL_WIDE:%.*]] = zext i8 [[EL]] to i32 +; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[EL_WIDE]], [[LEN]] +; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]], !llvm.virtual.unswitching.disabled !0 +; CHECK: backedge: +; CHECK-NEXT: [[ARR_PTR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[EL_WIDE]] +; CHECK-NEXT: store i32 [[IV]], ptr [[ARR_PTR]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]] +; CHECK: common.ret: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[BACKEDGE]] ], [ 0, [[GUARDED_SPLIT_US]] ], [ -1, [[LOOP]] ], [ -1, [[LOOP_US]] ], [ -2, [[GUARDED_SPLIT]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; +entry: + %len = load i32, ptr %len_p, align 4, !noundef !{} + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %el.ptr = getelementptr i8, ptr %p, i32 %iv + %el = load i8, ptr %el.ptr, align 4 + %bound_check = icmp slt i8 %el, 0 + br i1 %bound_check, label %bound_check_failed, label %guarded + +guarded: ; preds = %loop + %el.wide = zext i8 %el to i32 + %range_check = icmp ult i32 %el.wide, %len + br i1 %range_check, label %backedge, label %range_check_failed + +backedge: ; preds = %guarded + %arr.ptr = getelementptr i32, ptr %arr, i32 %el.wide + store i32 %iv, ptr %arr.ptr, align 4 + %iv.next = add i32 %iv, 1 + %loop_cond = icmp slt i32 %iv.next, %n + br i1 %loop_cond, label %loop, label %exit + +exit: ; preds = %backedge + ret i32 0 + +bound_check_failed: ; preds = %loop + ret i32 -1 + +range_check_failed: ; preds = %guarded + ret i32 -2 +}