Index: llvm/lib/Transforms/Scalar/LoopInterchange.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -1022,6 +1022,39 @@ return true; } +// In case of multi-level nested loops, it may occur that lcssa phis exist in +// the latch of InnerLoop, i.e., when defs of the incoming values are further +// inside the loopnest. Sometimes those incoming values are not available +// after interchange, since the original inner latch will become the new outer +// latch which may have predecessor paths that do not include those incoming +// values. +// TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of +// multi-level loop nests. +static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) { + + // If the original outer latch has only one predecessor, then values defined + // further inside the looploop, e.g., in the innermost loop, will be available + // at the new outer latch after interchange. + if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr) + return true; + + // The outer latch has more than one predecessors, i.e., the inner + // exit and the inner header. + // PHI nodes in the inner latch are lcssa phis where the incoming values + // are defined further inside the loopnest. Check if those phis are used + // in the original inner latch. If that is the case then bail out since + // those incoming values may not be available at the new outer latch. + BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); + for (PHINode &PHI : InnerLoopLatch->phis()) { + for (auto *U : PHI.users()) { + Instruction *UI = cast(U); + if (InnerLoopLatch == UI->getParent()) + return false; + } + } + return true; +} + bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) { @@ -1057,6 +1090,18 @@ return false; } + if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) { + LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedInnerLatchPHI", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Cannot interchange loops because unsupported PHI nodes found " + "in inner loop latch."; + }); + return false; + } + // TODO: The loops could not be interchanged due to current limitations in the // transform module. if (currentLimitations()) { Index: llvm/test/Transforms/LoopInterchange/innermost-latch-uses-values-in-middle-header.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopInterchange/innermost-latch-uses-values-in-middle-header.ll @@ -0,0 +1,57 @@ +; REQUIRES: asserts +; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \ +; RUN: -S -debug 2>&1 | FileCheck %s + +@a = common global i32 0, align 4 +@d = common dso_local local_unnamed_addr global [1 x [6 x i32]] zeroinitializer, align 4 + +;; After interchanging the inner and the middle loop, we should not continue +;; doing interchange for the (new) middle loop and the outer loop, because of +;; values defined in the new inner loop not available in the exiting block of +;; the entire loop nest. +; CHECK: Found unsupported PHI nodes in inner loop latch. +; CHECK: Not interchanging loops. Cannot prove legality. +; +define void @lcssa_phis_in_inner_latch() { +entry: + %0 = load i32, i32* @a, align 4 + %b = add i32 80, 1 + br label %outer.header + +outer.header: ; preds = %outer.latch, %entry + %indvar.outer = phi i32 [ 10, %entry ], [ %indvar.outer.next, %outer.latch ] + %tobool71.i = icmp eq i32 %0, 0 + br i1 %tobool71.i, label %inner.header.preheader, label %outer.latch + +inner.header.preheader: ; preds = %outer.header + br label %inner.header + +inner.header: ; preds = %inner.header.preheader, %inner.latch.split + %indvar.inner = phi i64 [ %1, %inner.latch.split ], [ 4, %inner.header.preheader ] + %indvar.middle.wide = zext i32 %b to i64 ; a def in the middle header + %arrayidx9.i = getelementptr inbounds [1 x [6 x i32]], [1 x [6 x i32]]* @d, i64 0, i64 %indvar.inner, i64 %indvar.inner + store i32 0, i32* %arrayidx9.i, align 4 + br label %inner.latch + +inner.latch: ; preds = %inner.body + %indvar.inner.next = add nsw i64 %indvar.inner, 1 + %tobool5.i = icmp eq i64 %indvar.inner.next, %indvar.middle.wide + br label %inner.latch.split + +inner.latch.split: ; preds = %middle.latch + %indvar.middle.wide.lcssa = phi i64 [ %indvar.middle.wide, %inner.latch ] + %1 = add nsw i64 %indvar.inner, 1 + %2 = icmp eq i64 %1, %indvar.middle.wide.lcssa + br i1 %2, label %outer.latch.loopexit, label %inner.header + +outer.latch.loopexit: ; preds = %inner.latch.split + br label %outer.latch + +outer.latch: ; preds = %outer.latch.loopexit, %outer.header + %indvar.outer.next = add nsw i32 %indvar.outer, -5 + %tobool.i = icmp eq i32 %indvar.outer.next, 0 + br i1 %tobool.i, label %outer.exit, label %outer.header + +outer.exit: ; preds = %outer.latch + ret void +}