diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -288,14 +288,12 @@ OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop) { - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) { + if (!L->getLoopPreheader()) { LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); return LoopUnrollResult::Unmodified; } - BasicBlock *LatchBlock = L->getLoopLatch(); - if (!LatchBlock) { + if (!L->getLoopLatch()) { LLVM_DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); return LoopUnrollResult::Unmodified; } @@ -306,37 +304,7 @@ return LoopUnrollResult::Unmodified; } - // The current loop unroll pass can unroll loops that have - // (1) single latch; and - // (2a) latch is unconditional; or - // (2b) latch is conditional and is an exiting block - // FIXME: The implementation can be extended to work with more complicated - // cases, e.g. loops with multiple latches. - BasicBlock *Header = L->getHeader(); - BranchInst *LatchBI = dyn_cast(LatchBlock->getTerminator()); - - // A conditional branch which exits the loop, which can be optimized to an - // unconditional branch in the unrolled loop in some cases. - BranchInst *ExitingBI = nullptr; - bool LatchIsExiting = L->isLoopExiting(LatchBlock); - if (LatchIsExiting) - ExitingBI = LatchBI; - else if (BasicBlock *ExitingBlock = L->getExitingBlock()) - ExitingBI = dyn_cast(ExitingBlock->getTerminator()); - if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) { - LLVM_DEBUG( - dbgs() << "Can't unroll; a conditional latch must exit the loop"); - return LoopUnrollResult::Unmodified; - } - LLVM_DEBUG({ - if (ExitingBI) - dbgs() << " Exiting Block = " << ExitingBI->getParent()->getName() - << "\n"; - else - dbgs() << " No single exiting block\n"; - }); - - if (Header->hasAddressTaken()) { + if (L->getHeader()->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. LLVM_DEBUG( dbgs() << " Won't unroll loop: address of header block is taken.\n"); @@ -365,20 +333,6 @@ // Are we eliminating the loop control altogether? bool CompletelyUnroll = ULO.Count == ULO.TripCount; - SmallVector ExitBlocks; - L->getExitBlocks(ExitBlocks); - std::vector OriginalLoopBlocks = L->getBlocks(); - - // Go through all exits of L and see if there are any phi-nodes there. We just - // conservatively assume that they're inserted to preserve LCSSA form, which - // means that complete unrolling might break this form. We need to either fix - // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For - // now we just recompute LCSSA for the outer loop, but it should be possible - // to fix it in-place. - bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll && - any_of(ExitBlocks, [](const BasicBlock *BB) { - return isa(BB->begin()); - }); // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime @@ -403,12 +357,63 @@ BasicBlock *ExitingBlock = L->getLoopLatch(); assert(ExitingBlock && "Loop without exiting block?"); assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?"); - Preheader = L->getLoopPreheader(); ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); } } + // All these values should be taken only after peeling because they might have + // changed. + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + BasicBlock *LatchBlock = L->getLoopLatch(); + SmallVector ExitBlocks; + L->getExitBlocks(ExitBlocks); + std::vector OriginalLoopBlocks = L->getBlocks(); + + // Go through all exits of L and see if there are any phi-nodes there. We just + // conservatively assume that they're inserted to preserve LCSSA form, which + // means that complete unrolling might break this form. We need to either fix + // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For + // now we just recompute LCSSA for the outer loop, but it should be possible + // to fix it in-place. + bool NeedToFixLCSSA = + PreserveLCSSA && CompletelyUnroll && + any_of(ExitBlocks, + [](const BasicBlock *BB) { return isa(BB->begin()); }); + + // The current loop unroll pass can unroll loops that have + // (1) single latch; and + // (2a) latch is unconditional; or + // (2b) latch is conditional and is an exiting block + // FIXME: The implementation can be extended to work with more complicated + // cases, e.g. loops with multiple latches. + BranchInst *LatchBI = dyn_cast(LatchBlock->getTerminator()); + + // A conditional branch which exits the loop, which can be optimized to an + // unconditional branch in the unrolled loop in some cases. + BranchInst *ExitingBI = nullptr; + bool LatchIsExiting = L->isLoopExiting(LatchBlock); + if (LatchIsExiting) + ExitingBI = LatchBI; + else if (BasicBlock *ExitingBlock = L->getExitingBlock()) + ExitingBI = dyn_cast(ExitingBlock->getTerminator()); + if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) { + // If the peeling guard is changed this assert may be relaxed or even + // deleted. + assert(!Peeled && "Peeling guard changed!"); + LLVM_DEBUG( + dbgs() << "Can't unroll; a conditional latch must exit the loop"); + return LoopUnrollResult::Unmodified; + } + LLVM_DEBUG({ + if (ExitingBI) + dbgs() << " Exiting Block = " << ExitingBI->getParent()->getName() + << "\n"; + else + dbgs() << " No single exiting block\n"; + }); + // Loops containing convergent instructions must have a count that divides // their TripMultiple. LLVM_DEBUG( diff --git a/llvm/test/Transforms/LoopUnroll/unroll-after-peel.ll b/llvm/test/Transforms/LoopUnroll/unroll-after-peel.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/unroll-after-peel.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=loop-unroll -S | FileCheck %s + +define i64 @hoge(i1 %c) { +; CHECK-LABEL: @hoge( +; CHECK-NEXT: bb: +; CHECK-NEXT: br label [[BB1_PEEL_BEGIN:%.*]] +; CHECK: bb1.peel.begin: +; CHECK-NEXT: br label [[BB1_PEEL:%.*]] +; CHECK: bb1.peel: +; CHECK-NEXT: br i1 [[C:%.*]], label [[BB2_PEEL:%.*]], label [[BB4:%.*]] +; CHECK: bb2.peel: +; CHECK-NEXT: [[TMP3_PEEL:%.*]] = icmp slt i32 0, 9 +; CHECK-NEXT: br i1 [[TMP3_PEEL]], label [[BB1_PEEL_NEXT:%.*]], label [[BB4]] +; CHECK: bb1.peel.next: +; CHECK-NEXT: br label [[BB1_PEEL_NEXT1:%.*]] +; CHECK: bb1.peel.next1: +; CHECK-NEXT: br label [[BB_PEEL_NEWPH:%.*]] +; CHECK: bb.peel.newph: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br i1 [[C]], label [[BB1]], label [[BB4_LOOPEXIT:%.*]], [[LOOP0:!llvm.loop !.*]] +; CHECK: bb4.loopexit: +; CHECK-NEXT: [[TMP5_PH:%.*]] = phi i32 [ 8, [[BB1]] ] +; CHECK-NEXT: br label [[BB4]] +; CHECK: bb4: +; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ 0, [[BB1_PEEL]] ], [ 8, [[BB2_PEEL]] ], [ [[TMP5_PH]], [[BB4_LOOPEXIT]] ] +; CHECK-NEXT: [[TMP6:%.*]] = call i64 (...) @llvm.experimental.deoptimize.i64(i32 10) [ "deopt"() ] +; CHECK-NEXT: ret i64 [[TMP6]] +; +bb: + br label %bb1 + +bb1: ; preds = %bb2, %bb + %tmp = phi i32 [ 8, %bb2 ], [ 0, %bb ] + br i1 %c, label %bb2, label %bb4 + +bb2: ; preds = %bb1 + %tmp3 = icmp slt i32 %tmp, 9 + br i1 %tmp3, label %bb1, label %bb4 + +bb4: ; preds = %bb2, %bb1 + %tmp5 = phi i32 [ 8, %bb2 ], [ %tmp, %bb1 ] + %tmp6 = call i64 (...) @llvm.experimental.deoptimize.i64(i32 10) [ "deopt"() ] + ret i64 %tmp6 +} + +declare i64 @llvm.experimental.deoptimize.i64(...)