Index: llvm/include/llvm/Transforms/Utils/Local.h =================================================================== --- llvm/include/llvm/Transforms/Utils/Local.h +++ llvm/include/llvm/Transforms/Utils/Local.h @@ -156,6 +156,9 @@ /// other than PHI nodes, potential debug intrinsics and the branch. If /// possible, eliminate BB by rewriting all the predecessors to branch to the /// successor block and return true. If we can't transform, return false. +// +// FIXME: Currently the conditions diverge when this function is called by +// 'JumpThreading' and 'SimplifyCFG'. The divergence should really be unified. bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU = nullptr); Index: llvm/lib/Transforms/Utils/Local.cpp =================================================================== --- llvm/lib/Transforms/Utils/Local.cpp +++ llvm/lib/Transforms/Utils/Local.cpp @@ -1090,6 +1090,108 @@ } } + // 'BB' and 'BB->Pred' are loop latches, bail out. + // + // In this case, we expect 'BB' is the latch for outer-loop and 'BB->Pred' is + // the latch for inner-loop (see reason below), so bail out to prerserve + // inner-loop metadata rather than eliminating 'BB' and attaching its metadata + // to this inner-loop. + // - The reason we believe 'BB' and 'BB->Pred' have different inner-most + // loops: assuming 'BB' and 'BB->Pred' are from the same inner-most loop L, + // then 'BB' is the header and latch of 'L' and thereby 'L' must consist of + // one self-looping basic block, which is contradictory with the assumption. + // + // To illustrate how inner-loop metadata is dropped: + // + // CFG Before + // + // BB is while.cond.exit, attached with loop metdata md2. + // BB->Pred is for.body, attached with loop metadata md1. + // + // entry + // | + // v + // ---> while.cond -------------> while.end + // | | + // | v + // | while.body + // | | + // | v + // | for.body <---- (md1) + // | | |______| + // | v + // | while.cond.exit (md2) + // | | + // |_______| + // + // CFG After + // + // while.cond1 is the merge of while.cond.exit and while.cond above. + // for.body is attached with md2, and md1 is dropped. + // If LoopSimplify runs later (as a part of loop pass), it could create + // dedicated exits for inner-loop (essentially adding `while.cond.exit` basck), + // but won't it won't see 'md1' nor restore it for the inner-loop. + // + // entry + // | + // v + // ---> while.cond1 -------------> while.end + // | | + // | v + // | while.body + // | | + // | v + // | for.body <---- (md2) + // |_______| |______| + // + // Alternative option 1, concatenate outer-loop metadata and inner-loop + // metadata and attach it to inner-loop metadata and proceed with deleting BB. + // This doesn't work since multiple transformations in the same loop metadata + // are not well defined, and the results could depend on loop pass ordering. + // Even if concatenation is correct, deleted 'BB' could be added back by + // LoopSimplify pass -> use CFG after as an example, after 'BB' is deleted, + // 'while.cond1' is an exit block with predecessors (entry) that are not + // dominated by inner-loop header (for.body), so LoopSimplify will add a + // dedicated exit for the inner-loop. By then it's ambigious how to undo the + // concatenation of loop metadata. + // + // Alternative option 2, partially fold 'BB' into 'BB->Succ' -> this may not + // simplify CFG. + // + // To illustrate: + // + // CFG Before + // + // BB.0 BB.1 BB.2 (md1) AnotherPred + // | / | | + // v v | | + // BB.PHI | | + // BB.BR (md2) <---- | + // | | + // v | + // Succ <------------------------- + // + // CFG After + // + // (md2) (md2) + // BB.0 BB.1 BB.2 (md1) AnotherPred + // | / | | + // | / v | + // | / BB.PHI | + // | / BB.BR(md2) | + // | / / | + // v v v | + // BB.PHI.Rewrite | + // Succ <---------------------- + if (Instruction *TI = BB->getTerminator()) + if (MDNode *LoopMD = TI->getMetadata(LLVMContext::MD_loop)) { + for (BasicBlock *Pred : predecessors(BB)) { + if (Instruction *PredTI = Pred->getTerminator()) + if (MDNode *PredLoopMD = PredTI->getMetadata(LLVMContext::MD_loop)) + return false; + } + } + LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); SmallVector Updates; Index: llvm/test/Transforms/SimplifyCFG/preserve-llvm-loop-metadata.ll =================================================================== --- llvm/test/Transforms/SimplifyCFG/preserve-llvm-loop-metadata.ll +++ llvm/test/Transforms/SimplifyCFG/preserve-llvm-loop-metadata.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -keep-loops=false -S < %s | FileCheck %s ; RUN: opt -passes='simplifycfg' -S < %s | FileCheck %s @@ -27,8 +28,6 @@ store i32 %add, ptr %count, align 4 br label %if.end -; CHECK: if.then: -; CHECK: br label %while.cond, !llvm.loop !0 if.else: ; preds = %while.body %4 = load i32, ptr %count, align 4 @@ -36,8 +35,6 @@ store i32 %add2, ptr %count, align 4 br label %if.end -; CHECK: if.else: -; CHECK: br label %while.cond, !llvm.loop !0 if.end: ; preds = %if.else, %if.then br label %while.cond, !llvm.loop !0 @@ -46,10 +43,10 @@ ret void } -; The test case is constructed based on the following C++ code, -; as a simplified test case to show why `llvm.loop.unroll.enable` -; could be dropped. +; Test that empty loop latch `while.cond.loopexit` will not be folded into its successor if its +; predecessor blocks are also loop latches. ; +; The test case is constructed based on the following C++ code. ; While the C++ code itself might have the inner-loop unrolled (e.g., with -O3), ; the loss of inner-loop unroll metadata is a bug. ; Under some optimization pipelines (e.g., FullLoopUnroll pass is skipped in ThinLTO prelink stage), @@ -111,4 +108,7 @@ !5 = !{!"llvm.loop.unroll.enable"} ; CHECK: !0 = distinct !{!0, !1} ; CHECK: !1 = !{!"llvm.loop.distribute.enable", i1 true} -; CHECK-NOT: !{!"llvm.loop.unroll.enable"} +; CHECK: !2 = distinct !{!2, !3} +; CHECK: !3 = !{!"llvm.loop.mustprogress"} +; CHECK: !4 = distinct !{!4, !3, !5} +; CHECK: !5 = !{!"llvm.loop.unroll.enable"}