diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -717,22 +717,6 @@ return true; } -static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) { - for (PHINode &PHI : Block->phis()) { - // Reduction lcssa phi will have only 1 incoming block that from loop latch. - if (PHI.getNumIncomingValues() > 1) - return false; - Instruction *Ins = dyn_cast(PHI.getIncomingValue(0)); - if (!Ins) - return false; - // Incoming value for lcssa phi's in outer loop exit can only be inner loop - // exits lcssa phi else it would not be tightly nested. - if (!isa(Ins) && isOuterLoopExitBlock) - return false; - } - return true; -} - // This function indicates the current limitations in the transform as a result // of which we do not proceed. bool LoopInterchangeLegality::currentLimitations() { @@ -831,21 +815,6 @@ return true; } - // TODO: We only handle LCSSA PHI's corresponding to reduction for now. - BasicBlock *InnerExit = InnerLoop->getExitBlock(); - if (!containsSafePHI(InnerExit, false)) { - LLVM_DEBUG( - dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n"); - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NoLCSSAPHIOuterInner", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Only inner loops with LCSSA PHIs can be interchange " - "currently."; - }); - return true; - } - // TODO: Current limitation: Since we split the inner loop latch at the point // were induction variable is incremented (induction.next); We cannot have // more than 1 user of induction.next since it would result in broken code @@ -921,6 +890,28 @@ return false; } +// We currently only support LCSSA PHI nodes in the inner loop exit, if their +// users are either reduction PHIs or PHIs outside the outer loop (which means +// the we are only interested in the final value after the loop). +static bool +areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, + SmallPtrSetImpl &Reductions) { + BasicBlock *InnerExit = OuterL->getUniqueExitBlock(); + for (PHINode &PHI : InnerExit->phis()) { + // Reduction lcssa phi will have only 1 incoming block that from loop latch. + if (PHI.getNumIncomingValues() > 1) + return false; + if (any_of(PHI.users(), [&Reductions, OuterL](User *U) { + PHINode *PN = dyn_cast(U); + return !PN || (Reductions.find(PN) == Reductions.end() && + OuterL->contains(PN->getParent())); + })) { + return false; + } + } + return true; +} + // We currently support LCSSA PHI nodes in the outer loop exit, if their // incoming values do not come from the outer loop latch or if the // outer loop latch has a single predecessor. In that case, the value will @@ -928,7 +919,7 @@ // will still be true after interchanging. If we have multiple predecessor, // that may not be the case, e.g. because the outer loop latch may be executed // if the inner loop is not executed. -static bool areLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) { +static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) { BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock(); for (PHINode &PHI : LoopNestExit->phis()) { // FIXME: We currently are not able to detect floating point reductions @@ -1013,7 +1004,19 @@ return false; } - if (!areLoopExitPHIsSupported(OuterLoop, InnerLoop)) { + if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop, + OuterInnerReductions)) { + LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Found unsupported PHI node in loop exit."; + }); + return false; + } + + if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) { LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n"); ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI", diff --git a/llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll b/llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -loop-interchange -S < %s | FileCheck %s + +; Test cases for PR43473. + +; In the 2 test cases below, we have a LCSSA PHI in the inner loop exit, which +; is used in the outer loop latch. This is not supported. + +define void @test1() { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ undef, [[ENTRY:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ] +; CHECK-NEXT: [[IDX:%.*]] = getelementptr inbounds double, double* undef, i64 [[OUTER_IV]] +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[OUTER_HEADER]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER]] ] +; CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[IDX]], align 8 +; CHECK-NEXT: store double undef, double* [[IDX]], align 8 +; CHECK-NEXT: [[INNER_IV_NEXT]] = add nuw nsw i64 [[INNER_IV]], 1 +; CHECK-NEXT: br i1 false, label [[INNER]], label [[OUTER_LATCH]] +; CHECK: outer.latch: +; CHECK-NEXT: [[INC43_LCSSA_WIDE_US:%.*]] = phi i64 [ [[INNER_IV_NEXT]], [[INNER]] ] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INC43_LCSSA_WIDE_US]] to i32 +; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nsw i64 [[OUTER_IV]], 1 +; CHECK-NEXT: br i1 false, label [[OUTER_HEADER]], label [[OUTER_EXIT:%.*]] +; CHECK: outer.exit: +; CHECK-NEXT: ret void +; +entry: + br label %outer.header + +outer.header: ; preds = %for.cond26.for.end44_crit_edge.us, %entry + %outer.iv = phi i64 [ undef, %entry ], [ %outer.iv.next, %outer.latch ] + %idx = getelementptr inbounds double, double* undef, i64 %outer.iv + br label %inner + +inner: ; preds = %for.body28.us, %for.body25.us + %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner ] + %0 = load double, double* %idx, align 8 + store double undef, double* %idx, align 8 + %inner.iv.next = add nuw nsw i64 %inner.iv, 1 + br i1 undef, label %inner, label %outer.latch + +outer.latch: ; preds = %inner + %inc43.lcssa.wide.us = phi i64 [ %inner.iv.next, %inner ] + %1 = trunc i64 %inc43.lcssa.wide.us to i32 + %outer.iv.next = add nsw i64 %outer.iv, 1 + br i1 undef, label %outer.header, label %outer.exit + +outer.exit: ; preds = %for.cond26.for.end44_crit_edge.us + ret void +} + +; Same as @test1, but with a dedicated inner loop exit block. +define void @test2() { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ undef, [[ENTRY:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ] +; CHECK-NEXT: [[IDX:%.*]] = getelementptr inbounds double, double* undef, i64 [[OUTER_IV]] +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[OUTER_HEADER]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER]] ] +; CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[IDX]], align 8 +; CHECK-NEXT: store double undef, double* [[IDX]], align 8 +; CHECK-NEXT: [[INNER_IV_NEXT]] = add nuw nsw i64 [[INNER_IV]], 1 +; CHECK-NEXT: br i1 false, label [[INNER]], label [[INNER_EXIT:%.*]] +; CHECK: inner.exit: +; CHECK-NEXT: [[INC43_LCSSA_WIDE_US:%.*]] = phi i64 [ [[INNER_IV_NEXT]], [[INNER]] ] +; CHECK-NEXT: br label [[OUTER_LATCH]] +; CHECK: outer.latch: +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INC43_LCSSA_WIDE_US]] to i32 +; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nsw i64 [[OUTER_IV]], 1 +; CHECK-NEXT: br i1 false, label [[OUTER_HEADER]], label [[OUTER_EXIT:%.*]] +; CHECK: outer.exit: +; CHECK-NEXT: ret void +; +entry: + br label %outer.header + +outer.header: ; preds = %for.cond26.for.end44_crit_edge.us, %entry + %outer.iv = phi i64 [ undef, %entry ], [ %outer.iv.next, %outer.latch ] + %idx = getelementptr inbounds double, double* undef, i64 %outer.iv + br label %inner + +inner: ; preds = %for.body28.us, %for.body25.us + %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner ] + %0 = load double, double* %idx, align 8 + store double undef, double* %idx, align 8 + %inner.iv.next = add nuw nsw i64 %inner.iv, 1 + br i1 undef, label %inner, label %inner.exit + +inner.exit: + %inc43.lcssa.wide.us = phi i64 [ %inner.iv.next, %inner ] + br label %outer.latch + +outer.latch: ; preds = %inner + %1 = trunc i64 %inc43.lcssa.wide.us to i32 + %outer.iv.next = add nsw i64 %outer.iv, 1 + br i1 undef, label %outer.header, label %outer.exit + +outer.exit: ; preds = %for.cond26.for.end44_crit_edge.us + ret void +} +