Index: llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -77,6 +77,8 @@ STATISTIC( NumCostMultiplierSkipped, "Number of unswitch candidates that had their cost multiplier skipped"); +STATISTIC(NumVirtualBranchMaterialized, + "Number of virtual branches by invariant conditions materialized"); static cl::opt EnableNonTrivialUnswitch( "enable-nontrivial-unswitch", cl::init(false), cl::Hidden, @@ -117,14 +119,43 @@ cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation.")); +static cl::opt UnswitchVirtualInvariants( + "unswitch-virtual-invariants", cl::Hidden, + cl::desc("Whether we should insert virtual invariants and unswitch them."), + cl::init(false)); + namespace { +struct CompareDesc { + BranchInst *Term; + Value *Invariant; + BasicBlock *InLoopSucc; + + CompareDesc(BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc) + : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {} +}; + +struct VirtualInvariant { + ICmpInst::Predicate Pred; + Value *LHS; + Value *RHS; + BasicBlock *InLoopSucc; + + VirtualInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS, + BasicBlock *InLoopSucc) + : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {} +}; + struct NonTrivialUnswitchCandidate { Instruction *TI = nullptr; TinyPtrVector Invariants; Optional Cost; + Optional Virtual; NonTrivialUnswitchCandidate(Instruction *TI, ArrayRef Invariants, - Optional Cost = None) - : TI(TI), Invariants(Invariants), Cost(Cost) {}; + Optional Cost = None, + Optional Virtual = None) + : TI(TI), Invariants(Invariants), Cost(Cost), Virtual(Virtual) {}; + + bool isVirtual() const { return Virtual.has_value(); } }; } // end anonymous namespace. @@ -2827,6 +2858,267 @@ return !UnswitchCandidates.empty(); } +/// Tries to canonicalize condition described by: +/// +/// br (LHS pred RHS), label IfTrue, label IfFalse +/// +/// into its equivalent where `Pred` is something that we support for virtual +/// invariants (so far it is limited to ult), LHS in canonicalized form is +/// non-invariant and RHS is an invariant. +static bool matchPredicate(ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, + BasicBlock *&IfTrue, BasicBlock *&IfFalse, Loop &L) { + // TODO: So far we want true branch to always stay in loop and false branch + // leave the loop. Both are not necessary, but need some effort to support it. + if (!L.contains(IfTrue)) { + Pred = ICmpInst::getInversePredicate(Pred); + std::swap(IfTrue, IfFalse); + } else if (L.contains(IfFalse)) + return false; + + // Move loop-invariant argument to RHS position. + if (L.isLoopInvariant(LHS)) { + Pred = ICmpInst::getSwappedPredicate(Pred); + std::swap(LHS, RHS); + } + + if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS)) + return false; + + if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) { + // Turn "x >=s 0" into "x getContext(), + APInt::getSignedMinValue(RHS->getType()->getIntegerBitWidth())); + } + + return true; +} + +/// Materialize given virtual candidate into IR. The virtual loop-invariant +/// condition implies the original loop-variant branch condition, so the +/// materialization turns +/// +/// loop_block: +/// ... +/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc +/// +/// into +/// +/// preheader: +/// %invariant_cond = LHS pred RHS +/// ... +/// loop_block: +/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck +/// OriginalCheck: +/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc +/// ... +static NonTrivialUnswitchCandidate +materializeVirtualCandidate(NonTrivialUnswitchCandidate Virtual, Loop &L, + DominatorTree &DT, LoopInfo &LI, + AssumptionCache &AC, MemorySSAUpdater *MSSAU) { + assert(Virtual.isVirtual()); + BasicBlock *Preheader = L.getLoopPreheader(); + assert(Preheader && "Loop is not in simplified form?"); + + auto Pred = Virtual.Virtual->Pred; + auto *LHS = Virtual.Virtual->LHS; + auto *RHS = Virtual.Virtual->RHS; + auto *InLoopSucc = Virtual.Virtual->InLoopSucc; + auto *TI = cast(Virtual.TI); + auto *BB = Virtual.TI->getParent(); + assert(InLoopSucc == TI->getSuccessor(0)); + auto *OutOfLoopSucc = TI->getSuccessor(1); + // FIXME: Remove this once limitation on successors is lifted. + assert(L.contains(InLoopSucc) && "Not supported yet!"); + assert(!L.contains(OutOfLoopSucc) && "Not supported yet!"); + auto &Ctx = BB->getContext(); + + IRBuilder<> Builder(Preheader->getTerminator()); + assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!"); + if (LHS->getType() != RHS->getType()) { + if (LHS->getType()->getIntegerBitWidth() < + RHS->getType()->getIntegerBitWidth()) + LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide"); + else + RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide"); + } + + auto *VirtualCond = Builder.CreateICmp(Pred, LHS, RHS, "virtual.cond"); + auto *OldCond = TI->getCondition(); + + BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check", + BB->getParent(), InLoopSucc); + Builder.SetInsertPoint(TI); + auto *InvariantBr = Builder.CreateCondBr(VirtualCond, InLoopSucc, CheckBlock); + + Builder.SetInsertPoint(CheckBlock); + auto *NewTerm = Builder.CreateCondBr(OldCond, InLoopSucc, OutOfLoopSucc); + + // Preserve valid metadata. + if (auto *MDNode = TI->getMetadata(LLVMContext::MD_make_implicit)) + NewTerm->setMetadata(LLVMContext::MD_make_implicit, MDNode); + if (auto *MDNode = TI->getMetadata(LLVMContext::MD_prof)) + NewTerm->setMetadata(LLVMContext::MD_prof, MDNode); + TI->eraseFromParent(); + // Prevent infinite unswitching. + NewTerm->setMetadata("llvm.virtual.unswitching.disabled", + MDNode::get(BB->getContext(), {})); + + // Fixup phis. + for (auto &I : *InLoopSucc) { + auto *PN = dyn_cast(&I); + if (!PN) + break; + auto *Inc = PN->getIncomingValueForBlock(BB); + PN->addIncoming(Inc, CheckBlock); + } + OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock); + + SmallVector DTUpdates = { + { DominatorTree::Insert, BB, CheckBlock }, + { DominatorTree::Insert, CheckBlock, InLoopSucc }, + { DominatorTree::Insert, CheckBlock, OutOfLoopSucc }, + { DominatorTree::Delete, BB, OutOfLoopSucc } + }; + + DT.applyUpdates(DTUpdates); + if (MSSAU) + MSSAU->applyUpdates(DTUpdates, DT); + L.addBasicBlockToLoop(CheckBlock, LI); + +#ifndef NDEBUG + DT.verify(); + LI.verify(DT); + if (MSSAU) + MSSAU->getMemorySSA()->verifyMemorySSA(); +#endif + + // TODO: In fact, cost of unswitching a new invariant candidate is *slightly* + // higher because we have just inserted a new block. Need to think how to + // adjust the cost of virtual candidates when it was first computed. + LLVM_DEBUG(dbgs() << "Materialized virtual loop-invariant branch " + << *InvariantBr << " and considering it for unswitching."); + ++NumVirtualBranchMaterialized; + return NonTrivialUnswitchCandidate(InvariantBr, { VirtualCond }, + Virtual.Cost); +} + +/// Given chain of loop branch conditions looking like: +/// br (Variant < Invariant1) +/// br (Variant < Invariant2) +/// br (Variant < Invariant3) +/// ... +/// collect set of virtual invariant conditions on which we want to unswitch, +/// which are: +/// Invariant1 <= Invariant2 +/// Invariant2 <= Invariant3 +/// ... +static bool insertVirtualCandidates( + SmallVectorImpl &UnswitchCandidates, Loop &L, + ICmpInst::Predicate Pred, ArrayRef Compares, + const DominatorTree &DT) { + + assert(ICmpInst::isRelational(Pred)); + assert(ICmpInst::isStrictPredicate(Pred)); + if (Compares.size() < 2) + return false; + ICmpInst::Predicate NonStrictPred = ICmpInst::getNonStrictPredicate(Pred); + for (auto Prev = Compares.begin(), Next = Compares.begin() + 1; + Next != Compares.end(); ++Prev, ++Next) { + Value *LHS = Next->Invariant; + Value *RHS = Prev->Invariant; + BasicBlock *InLoopSucc = Prev->InLoopSucc; + VirtualInvariant VI(NonStrictPred, LHS, RHS, InLoopSucc); + NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS }, None, VI); + UnswitchCandidates.push_back(std::move(Candidate)); + } + return true; +} + +/// Collect unswitch candidates by invariant conditions that are not immediately +/// present in the loop. However, they can be injected into the code if we +/// decide it's profitable. +/// An example of such conditions is following: +/// +/// for (...) { +/// x = load ... +/// if (! x +/// } +/// +/// We can unswitch by condition "C1 <=u C2". If that is true, then "x +/// } +/// +/// When inserting invariant conditions, we will just need to extend them to the +/// wider type. +static bool collectVirtualUnswitchCandidates( + SmallVectorImpl &UnswitchCandidates, + IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, + const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, + const MemorySSAUpdater *MSSAU) { + if (!UnswitchVirtualInvariants) + return false; + + if (!DT.isReachableFromEntry(L.getHeader())) + return false; + auto *Latch = L.getLoopLatch(); + // Need to have a single latch and a preheader. + if (!Latch) + return false; + assert(L.getLoopPreheader() && "Must have a preheader!"); + + DenseMap > CandidatesULT; + for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock()); + DTN = DTN->getIDom()) { + ICmpInst::Predicate Pred; + Value *LHS = nullptr, *RHS = nullptr; + BasicBlock *IfTrue = nullptr, *IfFalse = nullptr; + auto *BB = DTN->getBlock(); + auto *Term = BB->getTerminator(); + if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), + m_BasicBlock(IfTrue), m_BasicBlock(IfFalse)))) + continue; + // Skip branches that have already been unswithed this way. + if (Term->hasMetadata("llvm.virtual.unswitching.disabled")) + continue; + if (!matchPredicate(Pred, LHS, RHS, IfTrue, IfFalse, L)) + continue; + // TODO: Support other predicates. + if (Pred != ICmpInst::ICMP_ULT) + continue; + if (!L.contains(IfTrue) || L.contains(IfFalse)) + continue; + // FIXME: For some reason this causes problems with MSSA updates, need to + // investigate why. So far, just don't unswitch latch. + if (L.getHeader() == IfTrue) + continue; + // Strip ZEXT for unsigned predicate. + // TODO: once signed predicates are supported, also strip SEXT. + CompareDesc Desc(cast(Term), RHS, IfTrue); + while (auto *Zext = dyn_cast(LHS)) + LHS = Zext->getOperand(0); + CandidatesULT[LHS].push_back(Desc); + } + + bool Found = false; + for (auto &It : CandidatesULT) + Found |= insertVirtualCandidates(UnswitchCandidates, L, ICmpInst::ICMP_ULT, + It.second, DT); + return Found; +} + static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI) { if (!L.isSafeToClone()) return false; @@ -2986,10 +3278,11 @@ Instruction &TI = *Candidate.TI; ArrayRef Invariants = Candidate.Invariants; BranchInst *BI = dyn_cast(&TI); - InstructionCost CandidateCost = ComputeUnswitchedCost( - TI, /*FullUnswitch*/ !BI || - (Invariants.size() == 1 && - Invariants[0] == skipTrivialSelect(BI->getCondition()))); + bool FullUnswitch = + !BI || Candidate.isVirtual() || + (Invariants.size() == 1 && + Invariants[0] == skipTrivialSelect(BI->getCondition())); + InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch); // Calculate cost multiplier which is a tool to limit potentially // exponential behavior of loop-unswitch. if (EnableUnswitchCostMultiplier) { @@ -3028,8 +3321,11 @@ IVConditionInfo PartialIVInfo; Instruction *PartialIVCondBranch = nullptr; // If we didn't find any candidates, we're done. - if (!collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo, - PartialIVCondBranch, L, LI, AA, MSSAU)) + collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo, + PartialIVCondBranch, L, LI, AA, MSSAU); + collectVirtualUnswitchCandidates(UnswitchCandidates, PartialIVInfo, + PartialIVCondBranch, L, DT, LI, AA, MSSAU); + if (UnswitchCandidates.empty()) return false; LLVM_DEBUG( @@ -3038,7 +3334,6 @@ NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate( UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo); - assert(Best.TI && "Failed to find loop unswitch candidate"); assert(Best.Cost && "Failed to compute cost"); @@ -3048,6 +3343,10 @@ return false; } + if (Best.isVirtual()) + Best = materializeVirtualCandidate(Best, L, DT, LI, AC, MSSAU); + assert(!Best.isVirtual() && "Should be materialized by now!"); + if (Best.TI != PartialIVCondBranch) PartialIVInfo.InstToDuplicate.clear(); Index: llvm/test/Transforms/SimpleLoopUnswitch/insert-invariant-conditions.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimpleLoopUnswitch/insert-invariant-conditions.ll @@ -0,0 +1,286 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -enable-nontrivial-unswitch -unswitch-virtual-invariants -S -passes="loop(simple-loop-unswitch),simplifycfg" | FileCheck %s +; RUN: opt < %s -enable-nontrivial-unswitch -unswitch-virtual-invariants -S -passes="loop-mssa(simple-loop-unswitch),simplifycfg" -verify-memoryssa | FileCheck %s + +define i32 @test_01(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr noundef %arr, ptr noundef %len_p) { +; CHECK-LABEL: @test_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[LEN_P:%.*]], align 4, !noundef !0 +; CHECK-NEXT: [[VIRTUAL_COND:%.*]] = icmp ule i32 [[LIMIT:%.*]], [[LEN]] +; CHECK-NEXT: br i1 [[VIRTUAL_COND]], label [[LOOP_US:%.*]], label [[LOOP:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[EL_PTR_US:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[IV_US]] +; CHECK-NEXT: [[EL_US:%.*]] = load i32, ptr [[EL_PTR_US]], align 4 +; CHECK-NEXT: [[BOUND_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[LIMIT]] +; CHECK-NEXT: br i1 [[BOUND_CHECK_US]], label [[GUARDED_US]], label [[COMMON_RET:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: guarded.us: +; CHECK-NEXT: [[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[LEN]] +; CHECK-NEXT: [[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 [[EL_US]] +; CHECK-NEXT: store i32 [[IV_US]], ptr [[ARR_PTR_US]], align 4 +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]] +; CHECK-NEXT: br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[COMMON_RET]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; CHECK-NEXT: [[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4 +; CHECK-NEXT: [[BOUND_CHECK:%.*]] = icmp ult i32 [[EL]], [[LIMIT]] +; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[GUARDED:%.*]], label [[COMMON_RET]], !prof [[PROF1]] +; CHECK: guarded: +; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[EL]], [[LEN]] +; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]], !prof [[PROF1]], !llvm.virtual.unswitching.disabled !0 +; CHECK: backedge: +; CHECK-NEXT: [[ARR_PTR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[EL]] +; CHECK-NEXT: store i32 [[IV]], ptr [[ARR_PTR]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]] +; CHECK: common.ret: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[BACKEDGE]] ], [ 0, [[GUARDED_US]] ], [ -1, [[LOOP]] ], [ -1, [[LOOP_US]] ], [ -2, [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; +entry: + %len = load i32, ptr %len_p, align 4, !noundef !{} + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %el.ptr = getelementptr i32, ptr %p, i32 %iv + %el = load i32, ptr %el.ptr, align 4 + %bound_check = icmp ult i32 %el, %limit + br i1 %bound_check, label %guarded, label %bound_check_failed, !prof !{!"branch_weights", i32 100, i32 1} + +guarded: ; preds = %loop + %range_check = icmp ult i32 %el, %len + br i1 %range_check, label %backedge, label %range_check_failed, !prof !{!"branch_weights", i32 100, i32 1} + +backedge: ; preds = %guarded + %arr.ptr = getelementptr i32, ptr %arr, i32 %el + store i32 %iv, ptr %arr.ptr, align 4 + %iv.next = add i32 %iv, 1 + %loop_cond = icmp slt i32 %iv.next, %n + br i1 %loop_cond, label %loop, label %exit + +exit: ; preds = %backedge + ret i32 0 + +bound_check_failed: ; preds = %loop + ret i32 -1 + +range_check_failed: ; preds = %guarded + ret i32 -2 +} + +define i32 @test_02(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr noundef %arr, ptr noundef %len_p) { +; CHECK-LABEL: @test_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[LEN_P:%.*]], align 4, !noundef !0 +; CHECK-NEXT: [[VIRTUAL_COND:%.*]] = icmp ule i32 -2147483648, [[LEN]] +; CHECK-NEXT: br i1 [[VIRTUAL_COND]], label [[LOOP_US:%.*]], label [[LOOP:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[EL_PTR_US:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[IV_US]] +; CHECK-NEXT: [[EL_US:%.*]] = load i32, ptr [[EL_PTR_US]], align 4 +; CHECK-NEXT: [[BOUND_CHECK_US:%.*]] = icmp sge i32 [[EL_US]], 0 +; CHECK-NEXT: br i1 [[BOUND_CHECK_US]], label [[GUARDED_US]], label [[COMMON_RET:%.*]], !prof [[PROF1]] +; CHECK: guarded.us: +; CHECK-NEXT: [[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[LEN]] +; CHECK-NEXT: [[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 [[EL_US]] +; CHECK-NEXT: store i32 [[IV_US]], ptr [[ARR_PTR_US]], align 4 +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]] +; CHECK-NEXT: br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[COMMON_RET]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; CHECK-NEXT: [[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4 +; CHECK-NEXT: [[BOUND_CHECK:%.*]] = icmp sge i32 [[EL]], 0 +; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[GUARDED:%.*]], label [[COMMON_RET]], !prof [[PROF1]] +; CHECK: guarded: +; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[EL]], [[LEN]] +; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]], !prof [[PROF1]], !llvm.virtual.unswitching.disabled !0 +; CHECK: backedge: +; CHECK-NEXT: [[ARR_PTR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[EL]] +; CHECK-NEXT: store i32 [[IV]], ptr [[ARR_PTR]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]] +; CHECK: common.ret: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[BACKEDGE]] ], [ 0, [[GUARDED_US]] ], [ -1, [[LOOP]] ], [ -1, [[LOOP_US]] ], [ -2, [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; +entry: + %len = load i32, ptr %len_p, align 4, !noundef !{} + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %el.ptr = getelementptr i32, ptr %p, i32 %iv + %el = load i32, ptr %el.ptr, align 4 + %bound_check = icmp sge i32 %el, 0 + br i1 %bound_check, label %guarded, label %bound_check_failed, !prof !{!"branch_weights", i32 100, i32 1} + +guarded: ; preds = %loop + %range_check = icmp ult i32 %el, %len + br i1 %range_check, label %backedge, label %range_check_failed, !prof !{!"branch_weights", i32 100, i32 1} + +backedge: ; preds = %guarded + %arr.ptr = getelementptr i32, ptr %arr, i32 %el + store i32 %iv, ptr %arr.ptr, align 4 + %iv.next = add i32 %iv, 1 + %loop_cond = icmp slt i32 %iv.next, %n + br i1 %loop_cond, label %loop, label %exit + +exit: ; preds = %backedge + ret i32 0 + +bound_check_failed: ; preds = %loop + ret i32 -1 + +range_check_failed: ; preds = %guarded + ret i32 -2 +} + +define i32 @test_03(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr noundef %arr, ptr noundef %len_p) { +; CHECK-LABEL: @test_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[LEN_P:%.*]], align 4, !noundef !0 +; CHECK-NEXT: [[VIRTUAL_COND:%.*]] = icmp ule i32 -2147483648, [[LEN]] +; CHECK-NEXT: br i1 [[VIRTUAL_COND]], label [[LOOP_US:%.*]], label [[LOOP:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[EL_PTR_US:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[IV_US]] +; CHECK-NEXT: [[EL_US:%.*]] = load i32, ptr [[EL_PTR_US]], align 4 +; CHECK-NEXT: [[BOUND_CHECK_US:%.*]] = icmp slt i32 [[EL_US]], 0 +; CHECK-NEXT: br i1 [[BOUND_CHECK_US]], label [[COMMON_RET:%.*]], label [[GUARDED_US]], !prof [[PROF1]] +; CHECK: guarded.us: +; CHECK-NEXT: [[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[LEN]] +; CHECK-NEXT: [[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 [[EL_US]] +; CHECK-NEXT: store i32 [[IV_US]], ptr [[ARR_PTR_US]], align 4 +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]] +; CHECK-NEXT: br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[COMMON_RET]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; CHECK-NEXT: [[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4 +; CHECK-NEXT: [[BOUND_CHECK:%.*]] = icmp slt i32 [[EL]], 0 +; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[COMMON_RET]], label [[GUARDED:%.*]], !prof [[PROF1]] +; CHECK: guarded: +; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[EL]], [[LEN]] +; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]], !prof [[PROF1]], !llvm.virtual.unswitching.disabled !0 +; CHECK: backedge: +; CHECK-NEXT: [[ARR_PTR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[EL]] +; CHECK-NEXT: store i32 [[IV]], ptr [[ARR_PTR]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]] +; CHECK: common.ret: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[BACKEDGE]] ], [ 0, [[GUARDED_US]] ], [ -1, [[LOOP]] ], [ -1, [[LOOP_US]] ], [ -2, [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; +entry: + %len = load i32, ptr %len_p, align 4, !noundef !{} + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %el.ptr = getelementptr i32, ptr %p, i32 %iv + %el = load i32, ptr %el.ptr, align 4 + %bound_check = icmp slt i32 %el, 0 + br i1 %bound_check, label %bound_check_failed, label %guarded, !prof !{!"branch_weights", i32 100, i32 1} + +guarded: ; preds = %loop + %range_check = icmp ult i32 %el, %len + br i1 %range_check, label %backedge, label %range_check_failed, !prof !{!"branch_weights", i32 100, i32 1} + +backedge: ; preds = %guarded + %arr.ptr = getelementptr i32, ptr %arr, i32 %el + store i32 %iv, ptr %arr.ptr, align 4 + %iv.next = add i32 %iv, 1 + %loop_cond = icmp slt i32 %iv.next, %n + br i1 %loop_cond, label %loop, label %exit + +exit: ; preds = %backedge + ret i32 0 + +bound_check_failed: ; preds = %loop + ret i32 -1 + +range_check_failed: ; preds = %guarded + ret i32 -2 +} + +define i32 @test_04(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr noundef %arr, ptr noundef %len_p) { +; CHECK-LABEL: @test_04( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, ptr [[LEN_P:%.*]], align 4, !noundef !0 +; CHECK-NEXT: [[VIRTUAL_COND:%.*]] = icmp ule i32 128, [[LEN]] +; CHECK-NEXT: br i1 [[VIRTUAL_COND]], label [[LOOP_US:%.*]], label [[LOOP:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[EL_PTR_US:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[IV_US]] +; CHECK-NEXT: [[EL_US:%.*]] = load i8, ptr [[EL_PTR_US]], align 4 +; CHECK-NEXT: [[BOUND_CHECK_US:%.*]] = icmp slt i8 [[EL_US]], 0 +; CHECK-NEXT: br i1 [[BOUND_CHECK_US]], label [[COMMON_RET:%.*]], label [[GUARDED_US]], !prof [[PROF1]] +; CHECK: guarded.us: +; CHECK-NEXT: [[EL_WIDE_US:%.*]] = zext i8 [[EL_US]] to i32 +; CHECK-NEXT: [[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL_WIDE_US]], [[LEN]] +; CHECK-NEXT: [[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 [[EL_WIDE_US]] +; CHECK-NEXT: store i32 [[IV_US]], ptr [[ARR_PTR_US]], align 4 +; CHECK-NEXT: [[IV_NEXT_US]] = add i32 [[IV_US]], 1 +; CHECK-NEXT: [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]] +; CHECK-NEXT: br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[COMMON_RET]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i8, ptr [[P]], i32 [[IV]] +; CHECK-NEXT: [[EL:%.*]] = load i8, ptr [[EL_PTR]], align 4 +; CHECK-NEXT: [[BOUND_CHECK:%.*]] = icmp slt i8 [[EL]], 0 +; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[COMMON_RET]], label [[GUARDED:%.*]], !prof [[PROF1]] +; CHECK: guarded: +; CHECK-NEXT: [[EL_WIDE:%.*]] = zext i8 [[EL]] to i32 +; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[EL_WIDE]], [[LEN]] +; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]], !prof [[PROF1]], !llvm.virtual.unswitching.disabled !0 +; CHECK: backedge: +; CHECK-NEXT: [[ARR_PTR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[EL_WIDE]] +; CHECK-NEXT: store i32 [[IV]], ptr [[ARR_PTR]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]] +; CHECK: common.ret: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[BACKEDGE]] ], [ 0, [[GUARDED_US]] ], [ -1, [[LOOP]] ], [ -1, [[LOOP_US]] ], [ -2, [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; +entry: + %len = load i32, ptr %len_p, align 4, !noundef !{} + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + %el.ptr = getelementptr i8, ptr %p, i32 %iv + %el = load i8, ptr %el.ptr, align 4 + %bound_check = icmp slt i8 %el, 0 + br i1 %bound_check, label %bound_check_failed, label %guarded, !prof !{!"branch_weights", i32 100, i32 1} + +guarded: ; preds = %loop + %el.wide = zext i8 %el to i32 + %range_check = icmp ult i32 %el.wide, %len + br i1 %range_check, label %backedge, label %range_check_failed, !prof !{!"branch_weights", i32 100, i32 1} + +backedge: ; preds = %guarded + %arr.ptr = getelementptr i32, ptr %arr, i32 %el.wide + store i32 %iv, ptr %arr.ptr, align 4 + %iv.next = add i32 %iv, 1 + %loop_cond = icmp slt i32 %iv.next, %n + br i1 %loop_cond, label %loop, label %exit + +exit: ; preds = %backedge + ret i32 0 + +bound_check_failed: ; preds = %loop + ret i32 -1 + +range_check_failed: ; preds = %guarded + ret i32 -2 +}