Index: include/llvm/Transforms/Utils/UnrollLoop.h =================================================================== --- include/llvm/Transforms/Utils/UnrollLoop.h +++ include/llvm/Transforms/Utils/UnrollLoop.h @@ -50,7 +50,7 @@ TargetTransformInfo::UnrollingPreferences &UP); bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, - DominatorTree *DT, bool PreserveLCSSA); + DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA); MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name); } Index: lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- lib/Transforms/Utils/LoopUnroll.cpp +++ lib/Transforms/Utils/LoopUnroll.cpp @@ -51,6 +51,10 @@ cl::desc("Allow runtime unrolled loops to be unrolled " "with epilog instead of prolog.")); +static cl::opt +UnrollVerifyDomtree("unroll-verify-domtree", cl::init(false), cl::Hidden, + cl::desc("Verify domtree after unrolling")); + /// Convert the instruction operands from referencing the current values into /// those specified by VMap. static inline void remapInstruction(Instruction *I, @@ -297,7 +301,7 @@ "and peeling for the same loop"); if (PeelCount) - peelLoop(L, PeelCount, LI, SE, DT, PreserveLCSSA); + peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA); // Loops containing convergent instructions must have a count that divides // their TripMultiple. @@ -596,14 +600,11 @@ Term->eraseFromParent(); } } + // Update dominators of blocks we might reach through exits. // Immediate dominator of such block might change, because we add more // routes which can lead to the exit: we can now reach it from the copied - // iterations too. Thus, the new idom of the block will be the nearest - // common dominator of the previous idom and common dominator of all copies of - // the previous idom. This is equivalent to the nearest common dominator of - // the previous idom and the first latch, which dominates all copies of the - // previous idom. + // iterations too. if (DT && Count > 1) { for (auto *BB : OriginalLoopBlocks) { auto *BBDomNode = DT->getNode(BB); @@ -613,12 +614,36 @@ if (!L->contains(ChildBB)) ChildrenToUpdate.push_back(ChildBB); } - BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latches[0]); + BasicBlock *NewIDom; + if (BB == LatchBlock) { + // The latch is special because we emit unconditional branches in + // some cases. Since the latch is always at the bottom of the loop, + // the new dominator must also be a latch. Specifically, the dominator + // is the first latch which ends in a conditional branch, or the last + // latch if there is no such latch. + NewIDom = Latches.back(); + for (BasicBlock *IterLatch : Latches) { + TerminatorInst *Term = IterLatch->getTerminator(); + if (isa(Term) && cast(Term)->isConditional()) { + NewIDom = IterLatch; + break; + } + } + } else { + // The new idom of the block will be the nearest common dominator + // of all copies of the previous idom. This is equivalent to the + // nearest common dominator of the previous idom and the first latch, + // which dominates all copies of the previous idom. + NewIDom = DT->findNearestCommonDominator(BB, LatchBlock); + } for (auto *ChildBB : ChildrenToUpdate) DT->changeImmediateDominator(ChildBB, NewIDom); } } + if (DT && UnrollVerifyDomtree) + DT->verifyDomTree(); + // Merge adjacent basic blocks, if possible. SmallPtrSet ForgottenLoops; for (BasicBlock *Latch : Latches) { @@ -636,13 +661,6 @@ } } - // FIXME: We only preserve DT info for complete unrolling now. Incrementally - // updating domtree after partial loop unrolling should also be easy. - if (DT && !CompletelyUnroll) - DT->recalculate(*L->getHeader()->getParent()); - else if (DT) - DEBUG(DT->verifyDomTree()); - // Simplify any new induction variables in the partially unrolled loop. if (SE && !CompletelyUnroll && Count > 1) { SmallVector DeadInsts; @@ -697,8 +715,6 @@ // at least one layer outside of the loop that was unrolled so that any // changes to the parent loop exposed by the unrolling are considered. if (DT) { - if (!OuterL && !CompletelyUnroll) - OuterL = L; if (OuterL) { // OuterL includes all loops for which we can break loop-simplify, so // it's sufficient to simplify only it (it'll recursively simplify inner Index: lib/Transforms/Utils/LoopUnrollPeel.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollPeel.cpp +++ lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -28,6 +28,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" #include @@ -257,7 +258,7 @@ /// optimizations. bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, - bool PreserveLCSSA) { + AssumptionCache *AC, bool PreserveLCSSA) { if (!canPeel(L)) return false; @@ -395,10 +396,20 @@ LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); } + // FIXME: Incrementally update domtree. + DT->recalculate(*L->getHeader()->getParent()); + // If the loop is nested, we changed the parent loop, update SE. - if (Loop *ParentLoop = L->getParentLoop()) + if (Loop *ParentLoop = L->getParentLoop()) { SE->forgetLoop(ParentLoop); + // FIXME: Incrementally update loop-simplify + simplifyLoop(ParentLoop, DT, LI, SE, AC, PreserveLCSSA); + } else { + // FIXME: Incrementally update loop-simplify + simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA); + } + NumPeeled++; return true; Index: lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -146,6 +146,8 @@ // Add the branch to the exit block (around the unrolled loop) B.CreateCondBr(BrLoopExit, Exit, NewPreHeader); InsertPt->eraseFromParent(); + if (DT) + DT->changeImmediateDominator(Exit, PrologExit); } /// Connect the unrolling epilog code to the original loop. @@ -260,13 +262,20 @@ IRBuilder<> B(InsertPt); Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod"); assert(Exit && "Loop must have a single exit block only"); - // Split the exit to maintain loop canonicalization guarantees + // Split the epilogue exit to maintain loop canonicalization guarantees SmallVector Preds(predecessors(Exit)); SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolling loop) B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit); InsertPt->eraseFromParent(); + if (DT) + DT->changeImmediateDominator(Exit, NewExit); + + // Split the main loop exit to maintain canonicalization guarantees. + SmallVector NewExitPreds{Latch}; + SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, + PreserveLCSSA); } /// Create a clone of the blocks in a loop and connect them together. @@ -284,7 +293,7 @@ BasicBlock *Preheader, std::vector &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, - LoopInfo *LI) { + DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); @@ -345,6 +354,20 @@ } } + if (DT) { + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { + BasicBlock *NewBB = cast(VMap[*BB]); + if (Header == *BB) { + // The header is dominated by the preheader. + DT->addNewBlock(NewBB, InsertTop); + } else { + // Copy information from original loop to unrolled loop. + BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); + DT->addNewBlock(NewBB, cast(VMap[IDomBB])); + } + } + } + // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) { @@ -592,6 +615,12 @@ // Branch to either remainder (extra iterations) loop or unrolling loop. B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); PreHeaderBR->eraseFromParent(); + if (DT) { + if (UseEpilogRemainder) + DT->changeImmediateDominator(NewExit, PreHeader); + else + DT->changeImmediateDominator(PrologExit, PreHeader); + } Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog/epilog code @@ -616,7 +645,7 @@ BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, - InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI); + InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), Index: test/Transforms/LoopUnroll/2004-05-13-DontUnrollTooMuch.ll =================================================================== --- test/Transforms/LoopUnroll/2004-05-13-DontUnrollTooMuch.ll +++ test/Transforms/LoopUnroll/2004-05-13-DontUnrollTooMuch.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-unroll -disable-output +; RUN: opt < %s -loop-unroll -unroll-verify-domtree -disable-output define i32 @main() { entry: Index: test/Transforms/LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll =================================================================== --- test/Transforms/LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll +++ test/Transforms/LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-unroll -loop-simplify -disable-output +; RUN: opt < %s -loop-unroll -unroll-verify-domtree -loop-simplify -disable-output define void @print_board() { entry: Index: test/Transforms/LoopUnroll/2006-08-24-MultiBlockLoop.ll =================================================================== --- test/Transforms/LoopUnroll/2006-08-24-MultiBlockLoop.ll +++ test/Transforms/LoopUnroll/2006-08-24-MultiBlockLoop.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-unroll -S | grep bb72.2 +; RUN: opt < %s -loop-unroll -unroll-verify-domtree -S | grep bb72.2 define void @vorbis_encode_noisebias_setup() { entry: Index: test/Transforms/LoopUnroll/2007-04-16-PhiUpdate.ll =================================================================== --- test/Transforms/LoopUnroll/2007-04-16-PhiUpdate.ll +++ test/Transforms/LoopUnroll/2007-04-16-PhiUpdate.ll @@ -1,5 +1,5 @@ ; PR 1334 -; RUN: opt < %s -loop-unroll -disable-output +; RUN: opt < %s -loop-unroll -unroll-verify-domtree -disable-output define void @sal__math_float_manipulator_7__math__joint_array_dcv_ops__Omultiply__3([6 x float]* %agg.result) { entry: Index: test/Transforms/LoopUnroll/2007-05-05-UnrollMiscomp.ll =================================================================== --- test/Transforms/LoopUnroll/2007-05-05-UnrollMiscomp.ll +++ test/Transforms/LoopUnroll/2007-05-05-UnrollMiscomp.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-unroll -S | not grep undef +; RUN: opt < %s -loop-unroll -unroll-verify-domtree -S | not grep undef ; PR1385 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" Index: test/Transforms/LoopUnroll/2007-05-09-UnknownTripCount.ll =================================================================== --- test/Transforms/LoopUnroll/2007-05-09-UnknownTripCount.ll +++ test/Transforms/LoopUnroll/2007-05-09-UnknownTripCount.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-unroll -unroll-count=3 -S | grep bb72.2 +; RUN: opt < %s -loop-unroll -unroll-verify-domtree -unroll-count=3 -S | grep bb72.2 define void @foo(i32 %trips) { entry: Index: test/Transforms/LoopUnroll/2007-11-05-Crash.ll =================================================================== --- test/Transforms/LoopUnroll/2007-11-05-Crash.ll +++ test/Transforms/LoopUnroll/2007-11-05-Crash.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -disable-output -loop-unroll +; RUN: opt < %s -disable-output -loop-unroll -unroll-verify-domtree ; PR1770 ; PR1947 Index: test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll =================================================================== --- test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll +++ test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-unroll -S -unroll-count=4 | FileCheck %s +; RUN: opt < %s -loop-unroll -unroll-verify-domtree -S -unroll-count=4 | FileCheck %s ; Test phi update after partial unroll. declare i1 @check() nounwind Index: test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll =================================================================== --- test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll +++ test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll @@ -1,4 +1,4 @@ -; RUN: opt -S < %s -loop-unroll -unroll-count=4 | FileCheck %s +; RUN: opt -S < %s -loop-unroll -unroll-verify-domtree -unroll-count=4 | FileCheck %s ; ; Test induction variable simplify after loop unrolling. It should ; expose nice opportunities for GVN. Index: test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll =================================================================== --- test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll +++ test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll @@ -1,4 +1,4 @@ -; RUN: opt -S < %s -instcombine -inline -jump-threading -loop-unroll -unroll-count=4 | FileCheck %s +; RUN: opt -S < %s -instcombine -inline -jump-threading -loop-unroll -unroll-verify-domtree -unroll-count=4 | FileCheck %s ; ; This is a test case that required a number of setup passes because ; it depends on block order. Index: test/Transforms/LoopUnroll/2011-10-01-NoopTrunc.ll =================================================================== --- test/Transforms/LoopUnroll/2011-10-01-NoopTrunc.ll +++ test/Transforms/LoopUnroll/2011-10-01-NoopTrunc.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-threshold=150 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-threshold=150 | FileCheck %s ; ; Verify that trunc i64 to i32 is considered free by loop unrolling ; heuristics when i32 is a native type. Index: test/Transforms/LoopUnroll/2012-04-09-unroll-indirectbr.ll =================================================================== --- test/Transforms/LoopUnroll/2012-04-09-unroll-indirectbr.ll +++ test/Transforms/LoopUnroll/2012-04-09-unroll-indirectbr.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -simplifycfg | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -simplifycfg | FileCheck %s ; PR12513: Loop unrolling breaks with indirect branches. ; If loop unrolling attempts to transform this loop, it replaces the ; indirectbr successors. SimplifyCFG then considers them to be unreachable. Index: test/Transforms/LoopUnroll/AArch64/full-unroll-trip-count-upper-bound.ll =================================================================== --- test/Transforms/LoopUnroll/AArch64/full-unroll-trip-count-upper-bound.ll +++ test/Transforms/LoopUnroll/AArch64/full-unroll-trip-count-upper-bound.ll @@ -1,5 +1,5 @@ -; RUN: opt -loop-unroll -S -mtriple aarch64 -mcpu=cortex-a57 %s | FileCheck %s -check-prefix=UNROLL -; RUN: opt -loop-unroll -unroll-max-upperbound=0 -S -mtriple aarch64 -mcpu=cortex-a57 %s | FileCheck %s -check-prefix=NOUNROLL +; RUN: opt -loop-unroll -unroll-verify-domtree -S -mtriple aarch64 -mcpu=cortex-a57 %s | FileCheck %s -check-prefix=UNROLL +; RUN: opt -loop-unroll -unroll-verify-domtree -unroll-max-upperbound=0 -S -mtriple aarch64 -mcpu=cortex-a57 %s | FileCheck %s -check-prefix=NOUNROLL ; This IR comes from this C code: ; Index: test/Transforms/LoopUnroll/AArch64/partial.ll =================================================================== --- test/Transforms/LoopUnroll/AArch64/partial.ll +++ test/Transforms/LoopUnroll/AArch64/partial.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s ; Partial unroll 8 times for this loop. define void @unroll1() nounwind { Index: test/Transforms/LoopUnroll/AArch64/runtime-loop.ll =================================================================== --- test/Transforms/LoopUnroll/AArch64/runtime-loop.ll +++ test/Transforms/LoopUnroll/AArch64/runtime-loop.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG -; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG ; Tests for unrolling loops with run-time trip counts Index: test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll =================================================================== --- test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll +++ test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -loop-unroll -S < %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -loop-unroll -unroll-verify-domtree -S < %s | FileCheck %s ; CHECK-LABEL: @test_unroll_convergent_barrier( ; CHECK: call void @llvm.amdgcn.s.barrier() Index: test/Transforms/LoopUnroll/PowerPC/a2-high-cost-trip-count-computation.ll =================================================================== --- test/Transforms/LoopUnroll/PowerPC/a2-high-cost-trip-count-computation.ll +++ test/Transforms/LoopUnroll/PowerPC/a2-high-cost-trip-count-computation.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s +; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll -unroll-verify-domtree | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" Index: test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll =================================================================== --- test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll +++ test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG -; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG +; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll -unroll-verify-domtree -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll -unroll-verify-domtree -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly { entry: Index: test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll =================================================================== --- test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll +++ test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -loop-unroll | FileCheck %s +; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -loop-unroll -unroll-verify-domtree | FileCheck %s define void @unroll_default() nounwind { entry: br label %loop Index: test/Transforms/LoopUnroll/X86/mmx.ll =================================================================== --- test/Transforms/LoopUnroll/X86/mmx.ll +++ test/Transforms/LoopUnroll/X86/mmx.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/LoopUnroll/X86/partial.ll =================================================================== --- test/Transforms/LoopUnroll/X86/partial.ll +++ test/Transforms/LoopUnroll/X86/partial.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -S -loop-unroll -mcpu=nehalem | FileCheck %s -; RUN: opt < %s -S -loop-unroll -mcpu=core -unroll-runtime=0 | FileCheck -check-prefix=CHECK-NOUNRL %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -mcpu=nehalem | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -mcpu=core -unroll-runtime=0 | FileCheck -check-prefix=CHECK-NOUNRL %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/LoopUnroll/basic.ll =================================================================== --- test/Transforms/LoopUnroll/basic.ll +++ test/Transforms/LoopUnroll/basic.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-unroll -S | FileCheck %s +; RUN: opt < %s -loop-unroll -unroll-verify-domtree -S | FileCheck %s ; This should not unroll since the address of the loop header is taken. Index: test/Transforms/LoopUnroll/convergent.ll =================================================================== --- test/Transforms/LoopUnroll/convergent.ll +++ test/Transforms/LoopUnroll/convergent.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-allow-partial -S | FileCheck %s +; RUN: opt < %s -loop-unroll -unroll-verify-domtree -unroll-runtime -unroll-allow-partial -S | FileCheck %s declare void @f() convergent Index: test/Transforms/LoopUnroll/ephemeral.ll =================================================================== --- test/Transforms/LoopUnroll/ephemeral.ll +++ test/Transforms/LoopUnroll/ephemeral.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-threshold=50 | FileCheck %s ; Make sure this loop is completely unrolled... ; CHECK-LABEL: @test1 Index: test/Transforms/LoopUnroll/full-unroll-bad-cost.ll =================================================================== --- test/Transforms/LoopUnroll/full-unroll-bad-cost.ll +++ test/Transforms/LoopUnroll/full-unroll-bad-cost.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-unroll < %s | FileCheck %s +; RUN: opt -S -loop-unroll -unroll-verify-domtree < %s | FileCheck %s ; LLVM should not try to fully unroll this loop. Index: test/Transforms/LoopUnroll/full-unroll-crashers.ll =================================================================== --- test/Transforms/LoopUnroll/full-unroll-crashers.ll +++ test/Transforms/LoopUnroll/full-unroll-crashers.ll @@ -1,5 +1,5 @@ ; Check that we don't crash on corner cases. -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=1 -unroll-percent-dynamic-cost-saved-threshold=20 -o /dev/null +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=1 -unroll-percent-dynamic-cost-saved-threshold=20 -o /dev/null target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16 Index: test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll =================================================================== --- test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll +++ test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=70 -unroll-dynamic-cost-savings-discount=90 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=70 -unroll-dynamic-cost-savings-discount=90 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @unknown_global = internal unnamed_addr global [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16 Index: test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll =================================================================== --- test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll +++ test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=40 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=40 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16 Index: test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll =================================================================== --- test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll +++ test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=60 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=60 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @known_constant = internal unnamed_addr constant [10 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0], align 16 Index: test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll =================================================================== --- test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll +++ test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=60 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=60 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; When examining gep-instructions we shouldn't consider them simplified if the Index: test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll =================================================================== --- test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll +++ test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" define i64 @propagate_loop_phis() { Index: test/Transforms/LoopUnroll/full-unroll-heuristics.ll =================================================================== --- test/Transforms/LoopUnroll/full-unroll-heuristics.ll +++ test/Transforms/LoopUnroll/full-unroll-heuristics.ll @@ -17,10 +17,10 @@ ; optimizations to remove ~55% of the instructions, the loop body size is 9, ; and unrolled size is 65. -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=20 -unroll-dynamic-cost-savings-discount=0 | FileCheck %s -check-prefix=TEST1 -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=20 -unroll-dynamic-cost-savings-discount=90 | FileCheck %s -check-prefix=TEST2 -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=80 -unroll-dynamic-cost-savings-discount=90 | FileCheck %s -check-prefix=TEST3 -; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=100 -unroll-percent-dynamic-cost-saved-threshold=80 -unroll-dynamic-cost-savings-discount=0 | FileCheck %s -check-prefix=TEST4 +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=20 -unroll-dynamic-cost-savings-discount=0 | FileCheck %s -check-prefix=TEST1 +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=20 -unroll-dynamic-cost-savings-discount=90 | FileCheck %s -check-prefix=TEST2 +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=80 -unroll-dynamic-cost-savings-discount=90 | FileCheck %s -check-prefix=TEST3 +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=100 -unroll-percent-dynamic-cost-saved-threshold=80 -unroll-dynamic-cost-savings-discount=0 | FileCheck %s -check-prefix=TEST4 ; If the absolute threshold is too low, or if we can't optimize away requested ; percent of instructions, we shouldn't unroll: @@ -34,7 +34,7 @@ ; TEST4-NOT: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv ; And check that we don't crash when we're not allowed to do any analysis. -; RUN: opt < %s -loop-unroll -unroll-max-iteration-count-to-analyze=0 -disable-output +; RUN: opt < %s -loop-unroll -unroll-verify-domtree -unroll-max-iteration-count-to-analyze=0 -disable-output target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @known_constant = internal unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16 Index: test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll =================================================================== --- test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll +++ test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-unroll < %s | FileCheck %s +; RUN: opt -S -loop-unroll -unroll-verify-domtree < %s | FileCheck %s ; Unroll twice, with first loop exit kept ; CHECK-LABEL: @s32_max1 Index: test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll =================================================================== --- test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll +++ test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -unroll-runtime -loop-unroll < %s | FileCheck %s +; RUN: opt -S -unroll-runtime -loop-unroll -unroll-verify-domtree < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" Index: test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll =================================================================== --- test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll +++ test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt < %s -disable-output -stats -loop-unroll -info-output-file - | FileCheck %s --check-prefix=STATS +; RUN: opt < %s -disable-output -stats -loop-unroll -unroll-verify-domtree -info-output-file - | FileCheck %s --check-prefix=STATS ; STATS: 1 loop-unroll - Number of loops unrolled (completely or otherwise) ; Test that llvm.annotation intrinsic do not count against the loop body size ; and prevent unrolling. Index: test/Transforms/LoopUnroll/loop-remarks-with-hotness.ll =================================================================== --- test/Transforms/LoopUnroll/loop-remarks-with-hotness.ll +++ test/Transforms/LoopUnroll/loop-remarks-with-hotness.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -S -loop-unroll -pass-remarks=loop-unroll -pass-remarks-with-hotness -unroll-count=16 2>&1 | FileCheck -check-prefix=COMPLETE-UNROLL %s -; RUN: opt < %s -S -loop-unroll -pass-remarks=loop-unroll -pass-remarks-with-hotness -unroll-count=4 2>&1 | FileCheck -check-prefix=PARTIAL-UNROLL %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -pass-remarks=loop-unroll -pass-remarks-with-hotness -unroll-count=16 2>&1 | FileCheck -check-prefix=COMPLETE-UNROLL %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -pass-remarks=loop-unroll -pass-remarks-with-hotness -unroll-count=4 2>&1 | FileCheck -check-prefix=PARTIAL-UNROLL %s ; COMPLETE-UNROLL: remark: {{.*}}: completely unrolled loop with 16 iterations (hotness: 300) ; PARTIAL-UNROLL: remark: {{.*}}: unrolled loop by a factor of 4 {{.*}} (hotness: 300) Index: test/Transforms/LoopUnroll/loop-remarks.ll =================================================================== --- test/Transforms/LoopUnroll/loop-remarks.ll +++ test/Transforms/LoopUnroll/loop-remarks.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -S -loop-unroll -pass-remarks=loop-unroll -unroll-count=16 2>&1 | FileCheck -check-prefix=COMPLETE-UNROLL %s -; RUN: opt < %s -S -loop-unroll -pass-remarks=loop-unroll -unroll-count=4 2>&1 | FileCheck -check-prefix=PARTIAL-UNROLL %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -pass-remarks=loop-unroll -unroll-count=16 2>&1 | FileCheck -check-prefix=COMPLETE-UNROLL %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -pass-remarks=loop-unroll -unroll-count=4 2>&1 | FileCheck -check-prefix=PARTIAL-UNROLL %s ; COMPLETE-UNROLL: remark: {{.*}}: completely unrolled loop with 16 iterations ; PARTIAL-UNROLL: remark: {{.*}}: unrolled loop by a factor of 4 Index: test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll =================================================================== --- test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll +++ test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -unroll-threshold=20 -loop-unroll -unroll-allow-partial -unroll-runtime -unroll-allow-remainder -unroll-dynamic-cost-savings-discount=0 | FileCheck %s +; RUN: opt < %s -S -unroll-threshold=20 -loop-unroll -unroll-verify-domtree -unroll-allow-partial -unroll-runtime -unroll-allow-remainder -unroll-dynamic-cost-savings-discount=0 | FileCheck %s ; The Loop TripCount is 9. However unroll factors 3 or 9 exceed given threshold. ; The test checks that we choose a smaller, power-of-two, unroll count and do not give up on unrolling. Index: test/Transforms/LoopUnroll/peel-loop-pgo.ll =================================================================== --- test/Transforms/LoopUnroll/peel-loop-pgo.ll +++ test/Transforms/LoopUnroll/peel-loop-pgo.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -debug-only=loop-unroll -loop-unroll -unroll-allow-peeling 2>&1 | FileCheck %s +; RUN: opt < %s -S -debug-only=loop-unroll -loop-unroll -unroll-verify-domtree -unroll-allow-peeling 2>&1 | FileCheck %s ; REQUIRES: asserts ; Make sure we use the profile information correctly to peel-off 3 iterations Index: test/Transforms/LoopUnroll/peel-loop.ll =================================================================== --- test/Transforms/LoopUnroll/peel-loop.ll +++ test/Transforms/LoopUnroll/peel-loop.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-force-peel-count=3 -simplifycfg -instcombine | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-force-peel-count=3 -simplifycfg -instcombine | FileCheck %s ; Basic loop peeling - check that we can peel-off the first 3 loop iterations ; when explicitly requested. Index: test/Transforms/LoopUnroll/pr10813.ll =================================================================== --- test/Transforms/LoopUnroll/pr10813.ll +++ test/Transforms/LoopUnroll/pr10813.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-unroll -disable-output +; RUN: opt < %s -loop-unroll -unroll-verify-domtree -disable-output define void @"f_fu___REFUf[]REFUf[]Uf"() nounwind { allocas: Index: test/Transforms/LoopUnroll/pr11361.ll =================================================================== --- test/Transforms/LoopUnroll/pr11361.ll +++ test/Transforms/LoopUnroll/pr11361.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-unroll -disable-output < %s +; RUN: opt -loop-unroll -unroll-verify-domtree -disable-output < %s ; PR11361 ; This tests for an iterator invalidation issue. Index: test/Transforms/LoopUnroll/pr14167.ll =================================================================== --- test/Transforms/LoopUnroll/pr14167.ll +++ test/Transforms/LoopUnroll/pr14167.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-runtime | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-runtime | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-bgq-linux" Index: test/Transforms/LoopUnroll/pr18861.ll =================================================================== --- test/Transforms/LoopUnroll/pr18861.ll +++ test/Transforms/LoopUnroll/pr18861.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-unroll -indvars -disable-output +; RUN: opt < %s -loop-unroll -unroll-verify-domtree -indvars -disable-output @b = external global i32, align 4 Index: test/Transforms/LoopUnroll/pr27157.ll =================================================================== --- test/Transforms/LoopUnroll/pr27157.ll +++ test/Transforms/LoopUnroll/pr27157.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-unroll -debug-only=loop-unroll -disable-output < %s +; RUN: opt -loop-unroll -unroll-verify-domtree -debug-only=loop-unroll -disable-output < %s ; REQUIRES: asserts ; Compile this test with debug flag on to verify domtree right after loop unrolling. target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" Index: test/Transforms/LoopUnroll/pr28132.ll =================================================================== --- test/Transforms/LoopUnroll/pr28132.ll +++ test/Transforms/LoopUnroll/pr28132.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-unroll -S < %s | FileCheck %s +; RUN: opt -loop-unroll -unroll-verify-domtree -S < %s | FileCheck %s target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" target triple = "i686-pc-windows-msvc" Index: test/Transforms/LoopUnroll/rebuild_lcssa.ll =================================================================== --- test/Transforms/LoopUnroll/rebuild_lcssa.ll +++ test/Transforms/LoopUnroll/rebuild_lcssa.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-unroll -S | FileCheck %s +; RUN: opt < %s -loop-unroll -unroll-verify-domtree -S | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; This test shows how unrolling an inner loop could break LCSSA for an outer Index: test/Transforms/LoopUnroll/runtime-loop.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop.ll +++ test/Transforms/LoopUnroll/runtime-loop.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG -; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" Index: test/Transforms/LoopUnroll/runtime-loop1.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop1.ll +++ test/Transforms/LoopUnroll/runtime-loop1.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG -; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG ; This tests that setting the unroll count works Index: test/Transforms/LoopUnroll/runtime-loop2.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop2.ll +++ test/Transforms/LoopUnroll/runtime-loop2.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-runtime-epilog=true -unroll-count=8 | FileCheck %s -check-prefix=EPILOG -; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-threshold=25 -unroll-runtime -unroll-runtime-epilog=true -unroll-count=8 | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG ; Choose a smaller, power-of-two, unroll count if the loop is too large. ; This test makes sure we're not unrolling 'odd' counts Index: test/Transforms/LoopUnroll/runtime-loop3.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop3.ll +++ test/Transforms/LoopUnroll/runtime-loop3.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt < %s -disable-output -stats -loop-unroll -unroll-runtime -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS +; RUN: opt < %s -disable-output -stats -loop-unroll -unroll-verify-domtree -unroll-runtime -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS ; Test that nested loops can be unrolled. We need to increase threshold to do it Index: test/Transforms/LoopUnroll/runtime-loop5.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop5.ll +++ test/Transforms/LoopUnroll/runtime-loop5.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s -; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s ; Given that the trip-count of this loop is a 3-bit value, we cannot ; safely unroll it with a count of anything more than 8. Index: test/Transforms/LoopUnroll/scevunroll.ll =================================================================== --- test/Transforms/LoopUnroll/scevunroll.ll +++ test/Transforms/LoopUnroll/scevunroll.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -indvars -loop-unroll -verify-loop-info | FileCheck %s +; RUN: opt < %s -S -indvars -loop-unroll -unroll-verify-domtree -verify-loop-info | FileCheck %s ; ; Unit tests for loop unrolling using ScalarEvolution to compute trip counts. ; Index: test/Transforms/LoopUnroll/shifted-tripcount.ll =================================================================== --- test/Transforms/LoopUnroll/shifted-tripcount.ll +++ test/Transforms/LoopUnroll/shifted-tripcount.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-unroll -unroll-count=2 -S | FileCheck %s +; RUN: opt < %s -loop-unroll -unroll-verify-domtree -unroll-count=2 -S | FileCheck %s ; LoopUnroll should unroll this loop into one big basic block. Index: test/Transforms/LoopUnroll/tripcount-overflow.ll =================================================================== --- test/Transforms/LoopUnroll/tripcount-overflow.ll +++ test/Transforms/LoopUnroll/tripcount-overflow.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG -; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG +; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll -unroll-verify-domtree -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll -unroll-verify-domtree -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; This test case documents how runtime loop unrolling handles the case Index: test/Transforms/LoopUnroll/unloop.ll =================================================================== --- test/Transforms/LoopUnroll/unloop.ll +++ test/Transforms/LoopUnroll/unloop.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -verify-loop-info | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -verify-loop-info | FileCheck %s ; RUN: opt < %s -S -passes='function(require,require,require,loop(unroll),verify)' | FileCheck %s ; ; Unit tests for LoopInfo::markAsRemoved. Index: test/Transforms/LoopUnroll/unroll-cleanuppad.ll =================================================================== --- test/Transforms/LoopUnroll/unroll-cleanuppad.ll +++ test/Transforms/LoopUnroll/unroll-cleanuppad.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-unroll %s | FileCheck %s +; RUN: opt -S -loop-unroll -unroll-verify-domtree %s | FileCheck %s target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc18.0.0" Index: test/Transforms/LoopUnroll/unroll-count.ll =================================================================== --- test/Transforms/LoopUnroll/unroll-count.ll +++ test/Transforms/LoopUnroll/unroll-count.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-count=2 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-count=2 | FileCheck %s ; Checks that "llvm.loop.unroll.disable" is set when ; unroll with count set by user has been applied. ; Index: test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll =================================================================== --- test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll +++ test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-threshold=40 -unroll-dynamic-cost-savings-discount=0 | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-runtime -unroll-threshold=40 -unroll-dynamic-cost-savings-discount=0 | FileCheck %s @known_constant = internal unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16 Index: test/Transforms/LoopUnroll/unroll-opt-attribute.ll =================================================================== --- test/Transforms/LoopUnroll/unroll-opt-attribute.ll +++ test/Transforms/LoopUnroll/unroll-opt-attribute.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -S -loop-unroll -unroll-count=4 | FileCheck -check-prefix=CHECK_COUNT4 %s -; RUN: opt < %s -S -loop-unroll | FileCheck -check-prefix=CHECK_NOCOUNT %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree -unroll-count=4 | FileCheck -check-prefix=CHECK_COUNT4 %s +; RUN: opt < %s -S -loop-unroll -unroll-verify-domtree | FileCheck -check-prefix=CHECK_NOCOUNT %s ;///////////////////// TEST 1 ////////////////////////////// Index: test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll =================================================================== --- test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll +++ test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-unroll -S | FileCheck %s +; RUN: opt < %s -loop-unroll -unroll-verify-domtree -S | FileCheck %s ; ; Verify that the unrolling pass removes existing unroll count metadata ; and adds a disable unrolling node after unrolling is complete. Index: test/Transforms/LoopUnroll/unroll-pragmas.ll =================================================================== --- test/Transforms/LoopUnroll/unroll-pragmas.ll +++ test/Transforms/LoopUnroll/unroll-pragmas.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s -; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s +; RUN: opt < %s -loop-unroll -unroll-verify-domtree -pragma-unroll-threshold=1024 -S | FileCheck %s +; RUN: opt < %s -loop-unroll -unroll-verify-domtree -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s ; ; Run loop unrolling twice to verify that loop unrolling metadata is properly ; removed and further unrolling is disabled after the pass is run once. Index: test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll =================================================================== --- test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll +++ test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll @@ -1,4 +1,4 @@ -; RUN: opt -S < %s -loop-unroll -block-freq | FileCheck %s +; RUN: opt -S < %s -loop-unroll -unroll-verify-domtree -block-freq | FileCheck %s ; Crasher from PR20987. ; CHECK: define void @update_loop_info_in_subloops