Index: lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- lib/Transforms/Utils/LoopUtils.cpp +++ lib/Transforms/Utils/LoopUtils.cpp @@ -36,6 +36,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/IR/InstrTypes.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -74,9 +75,37 @@ if (IsDedicatedExit) return false; + // With nested loops, the inner loop might exit to the header of an + // enclosing loop, and the in-loop-predecessor is a latch for that + // enclosing loop. If we insert a block between the latch and the header, + // that block becomes the new latch. Any loop metadata from the old latch + // needs to be moved to the new one. + MDNode *OuterLoopMD = nullptr; + if (LI->isLoopHeader(BB)) { + OuterLoopMD = LI->getLoopFor(BB)->getLoopID(); + // getLoopID() also verifies that all latches have the same metadata + } + auto *NewExitBB = SplitBlockPredecessors( BB, InLoopPredecessors, ".loopexit", DT, LI, nullptr, PreserveLCSSA); + // If OuterLoopMD is non-null, we know that BB is a loop header for + // a different loop, and that loop has metadata. Check that the new block + // is enclosed by the other loop, which means that it's a latch. + if (NewExitBB && OuterLoopMD && + LI->getLoopFor(NewExitBB) == LI->getLoopFor(BB)) { + // Every pred of the new block should be a latch for the outer loop, + // with the same metadata. + for (auto *PredLoopBB : InLoopPredecessors) { + Instruction *TI = PredLoopBB->getTerminator(); + assert(TI->getMetadata(LLVMContext::MD_loop) == OuterLoopMD && + "exit edge to other loop doesn't contain expected metadata"); + TI->setMetadata(LLVMContext::MD_loop, nullptr); + } + NewExitBB->getTerminator()->setMetadata(LLVMContext::MD_loop, + OuterLoopMD); + } + if (!NewExitBB) LLVM_DEBUG( dbgs() << "WARNING: Can't create a dedicated exit block for loop: " Index: test/Transforms/LoopSimplify/preserve-llvm-loop-metadata2.ll =================================================================== --- /dev/null +++ test/Transforms/LoopSimplify/preserve-llvm-loop-metadata2.ll @@ -0,0 +1,65 @@ +; RUN: opt -S -loop-simplify < %s | FileCheck %s + +; CHECK: for.cond.loopexit: +; CHECK-NEXT: llvm.loop [[LOOPMD:.*]] +; CHECK-NOT: br i1 {{.*}}, label {{.*}}, label %for.cond.loopexit, !llvm.loop [[LOOPMD]] +; CHECK: br label %for.cond1, !llvm.loop {{.*}} + +; Two-loop nest with llvm.loop metadata on each loop. +; After loop-simplify, a new block is created between for.cond1 (inner loop's +; exit) and for.cond (header of outer loop). +; for.cond1 contains the llvm.loop metadata for the outer loop, which must +; be copied to the new block, as the new block is now the outer loop latch. + +define dso_local void @loopnest(double* noalias nocapture readonly %a, double* noalias nocapture %b) local_unnamed_addr #0 { +entry: + br label %for.cond + +for.cond: ; preds = %for.cond1, %entry + %ii.0 = phi i64 [ 2, %entry ], [ %add, %for.cond1 ] + %cmp = icmp ult i64 %ii.0, 262142 + br i1 %cmp, label %for.cond1, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + ret void + +for.cond1: ; preds = %for.cond, %for.body4 + %j.0 = phi i64 [ %add10, %for.body4 ], [ %ii.0, %for.cond ] + %add = add nuw nsw i64 %ii.0, 16 + %cmp2 = icmp ult i64 %j.0, %add + br i1 %cmp2, label %for.body4, label %for.cond, !llvm.loop !2 + +for.body4: ; preds = %for.cond1 + %sub = add nsw i64 %j.0, -2 + %arrayidx = getelementptr inbounds double, double* %a, i64 %sub + %0 = load double, double* %arrayidx, align 8, !tbaa !4 + %arrayidx5 = getelementptr inbounds double, double* %a, i64 %j.0 + %1 = load double, double* %arrayidx5, align 8, !tbaa !4 + %add6 = fadd double %0, %1 + %sub7 = add nsw i64 %j.0, -1 + %arrayidx8 = getelementptr inbounds double, double* %a, i64 %sub7 + %2 = load double, double* %arrayidx8, align 8, !tbaa !4 + %add9 = fadd double %add6, %2 + %add10 = add nuw nsw i64 %j.0, 1 + %arrayidx11 = getelementptr inbounds double, double* %a, i64 %add10 + %3 = load double, double* %arrayidx11, align 8, !tbaa !4 + %add12 = fadd double %add9, %3 + %add13 = add nuw nsw i64 %j.0, 2 + %arrayidx14 = getelementptr inbounds double, double* %a, i64 %add13 + %4 = load double, double* %arrayidx14, align 8, !tbaa !4 + %add15 = fadd double %add12, %4 + %arrayidx16 = getelementptr inbounds double, double* %b, i64 %j.0 + store double %add15, double* %arrayidx16, align 8, !tbaa !4 + br label %for.cond1, !llvm.loop !8 +} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 8.0.0"} +!2 = distinct !{!2, !3} +!3 = !{!"llvm.loop.unroll_and_jam.count", i32 17} +!4 = !{!5, !5, i64 0} +!5 = !{!"double", !6, i64 0} +!6 = !{!"omnipotent char", !7, i64 0} +!7 = !{!"Simple C/C++ TBAA"} +!8 = distinct !{!8, !9} +!9 = !{!"llvm.loop.unroll.count", i32 1}