Index: llvm/lib/Transforms/Scalar/LoopInterchange.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -1022,6 +1022,39 @@ return true; } +// When the inner loop is a nested loop itself, if there is a def inside the +// inner loop, i.e., between the inner header and the inner latch, and there +// is a use in the original inner latch, there will be an lcssa phi node in +// the original inner latch, and we should check whether it is supported. +// Note that the original inner latch will become the new outer latch after +// interchange. We check whether the incoming values of those lcssa phis +// are available in the new outer latch after interchange. +static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) { + if (InnerLoop->getSubLoops().empty()) + return true; + // If the original outer latch has only one predecessor, then values defined + // inside the inner loop, i.e., in the innermost loop, will be available at + // the new outer latch after interchange. + if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr) + return true; + + // The outer latch has more than one predecessors, i.e., the inner + // exit and the inner header. + // PHI nodes in the inner latch are lcssa phis where the incoming values + // are defined in the innermost loop. Check if those phis are used in the + // original inner latch. If that is the case then bail out since those + // incoming values may not be available at the new outer latch. + BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); + for (PHINode &PHI : InnerLoopLatch->phis()) { + for (auto *U : PHI.users()) { + Instruction *UI = cast(U); + if (InnerLoopLatch == UI->getParent()) + return false; + } + } + return true; +} + bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) { @@ -1057,6 +1090,18 @@ return false; } + if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) { + LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedInnerLatchPHI", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Cannot interchange loops because unsupported PHI nodes found " + "in inner loop latch."; + }); + return false; + } + // TODO: The loops could not be interchanged due to current limitations in the // transform module. if (currentLimitations()) { Index: llvm/test/Transforms/LoopInterchange/innermost-latch-uses-values-in-middle-header.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopInterchange/innermost-latch-uses-values-in-middle-header.ll @@ -0,0 +1,58 @@ +; REQUIRES: asserts +; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \ +; RUN: -S -debug 2>&1 | FileCheck %s + +@a = common global i32 0, align 4 +@d = common dso_local local_unnamed_addr global [1 x [6 x i32]] zeroinitializer, align 4 + +;; After interchanging the innermost and the middle loop, we should not continue +;; doing interchange for the (new) middle loop and the outermost loop, because of +;; values defined in the new innermost loop not available in the exiting block of +;; the entire loop nest. +; CHECK: Loops are legal to interchange +; CHECK: Loops interchanged. +; CHECK: Found unsupported PHI nodes in inner loop latch. +; CHECK: Not interchanging loops. Cannot prove legality. +define void @innermost_latch_uses_values_in_middle_header() { +entry: + %0 = load i32, i32* @a, align 4 + %b = add i32 80, 1 + br label %outermost.header + +outermost.header: ; preds = %outermost.latch, %entry + %indvar.outermost = phi i32 [ 10, %entry ], [ %indvar.outermost.next, %outermost.latch ] + %tobool71.i = icmp eq i32 %0, 0 + br i1 %tobool71.i, label %middle.header, label %outermost.latch + +middle.header: ; preds = %middle.latch, %outermost.header + %indvar.middle = phi i64 [ 4, %outermost.header ], [ %indvar.middle.next, %middle.latch ] + %indvar.middle.wide = zext i32 %b to i64 ; a def in the middle header + br label %innermost.header + +innermost.header: ; preds = %middle.header, %innermost.latch + %indvar.innermost = phi i64 [ %indvar.innermost.next, %innermost.latch ], [ 4, %middle.header ] + br label %innermost.body + +innermost.body: ; preds = %innermost.header + %arrayidx9.i = getelementptr inbounds [1 x [6 x i32]], [1 x [6 x i32]]* @d, i64 0, i64 %indvar.innermost, i64 %indvar.middle + store i32 0, i32* %arrayidx9.i, align 4 + br label %innermost.latch + +innermost.latch: ; preds = %innermost.body + %indvar.innermost.next = add nsw i64 %indvar.innermost, 1 + %tobool5.i = icmp eq i64 %indvar.innermost.next, %indvar.middle.wide ; corresponding use in the innermost latch + br i1 %tobool5.i, label %middle.latch, label %innermost.header + +middle.latch: ; preds = %innermost.latch + %indvar.middle.next = add nsw i64 %indvar.middle, -1 + %tobool2.i = icmp eq i64 %indvar.middle.next, 0 + br i1 %tobool2.i, label %outermost.latch, label %middle.header + +outermost.latch: ; preds = %middle.latch, %outermost.header + %indvar.outermost.next = add nsw i32 %indvar.outermost, -5 + %tobool.i = icmp eq i32 %indvar.outermost.next, 0 + br i1 %tobool.i, label %outermost.exit, label %outermost.header + +outermost.exit: ; preds = %outermost.latch + ret void +}