Index: llvm/lib/Transforms/Scalar/LoopFlatten.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopFlatten.cpp +++ llvm/lib/Transforms/Scalar/LoopFlatten.cpp @@ -97,9 +97,18 @@ // Holds the old/narrow induction phis, i.e. the Phis before IV widening has // been applied. This bookkeeping is used so we can skip some checks on these // phi nodes. - SmallPtrSet OldInductionPHIs; + PHINode *NarrowInnerInductionPHI = nullptr; + PHINode *NarrowOuterInductionPHI = nullptr; + bool IsTriviallyDeletedPhi = false; FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL) {}; + + bool isNarrowInductionPhi(PHINode *Phi) { + // This can't be the narrow phi if we haven't widened the IV first. + if (!Widened) + return false; + return NarrowInnerInductionPHI == Phi || NarrowOuterInductionPHI == Phi; + } }; static bool @@ -268,7 +277,7 @@ // them specially when doing the transformation. if (&InnerPHI == FI.InnerInductionPHI) continue; - if (FI.Widened && FI.OldInductionPHIs.count(&InnerPHI)) + if (FI.isNarrowInductionPhi(&InnerPHI)) continue; // Each inner loop PHI node must have two incoming values/blocks - one @@ -315,7 +324,7 @@ } for (PHINode &OuterPHI : FI.OuterLoop->getHeader()->phis()) { - if (FI.Widened && FI.OldInductionPHIs.count(&OuterPHI)) + if (FI.isNarrowInductionPhi(&OuterPHI)) continue; if (!SafeOuterPHIs.count(&OuterPHI)) { LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump()); @@ -619,6 +628,8 @@ // Fix up PHI nodes that take values from the inner loop back-edge, which // we are about to remove. FI.InnerInductionPHI->removeIncomingValue(FI.InnerLoop->getLoopLatch()); + if (FI.Widened && !FI.IsTriviallyDeletedPhi) + FI.NarrowInnerInductionPHI->removeIncomingValue(FI.InnerLoop->getLoopLatch()); // The old Phi will be optimised away later, but for now we can't leave // leave it in an invalid state, so are updating them too. @@ -705,13 +716,11 @@ return true; }; - bool Deleted; - if (!CreateWideIV({FI.InnerInductionPHI, MaxLegalType, false }, Deleted)) + if (!CreateWideIV({FI.InnerInductionPHI, MaxLegalType, false }, + FI.IsTriviallyDeletedPhi)) return false; - // If the inner Phi node cannot be trivially deleted, we need to at least - // bring it in a consistent state. - if (!Deleted) - FI.InnerInductionPHI->removeIncomingValue(FI.InnerLoop->getLoopLatch()); + + bool Deleted; if (!CreateWideIV({FI.OuterInductionPHI, MaxLegalType, false }, Deleted)) return false; @@ -719,8 +728,8 @@ FI.Widened = true; // Save the old/narrow induction phis, which we need to ignore in CheckPHIs. - FI.OldInductionPHIs.insert(FI.InnerInductionPHI); - FI.OldInductionPHIs.insert(FI.OuterInductionPHI); + FI.NarrowInnerInductionPHI = FI.InnerInductionPHI; + FI.NarrowOuterInductionPHI = FI.OuterInductionPHI; // After widening, rediscover all the loop components. return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI); Index: llvm/test/Transforms/LoopFlatten/widen-iv2.ll =================================================================== --- llvm/test/Transforms/LoopFlatten/widen-iv2.ll +++ llvm/test/Transforms/LoopFlatten/widen-iv2.ll @@ -37,7 +37,7 @@ ; CHECK-NEXT: br label [[FOR_BODY3_US:%.*]] ; CHECK: for.body3.us: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY3_US]] ], [ 0, [[FOR_COND1_PREHEADER_US]] ] -; CHECK-NEXT: [[J_014_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ] +; CHECK-NEXT: [[J_014_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INC_US:%.*]], [[FOR_BODY3_US]] ] ; CHECK-NEXT: [[TMP7:%.*]] = add nsw i64 [[INDVAR]], [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = sext i32 [[J_014_US]] to i64 ; CHECK-NEXT: [[TMP9:%.*]] = add nsw i64 [[TMP8]], [[TMP5]] @@ -46,7 +46,7 @@ ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 [[TMP7]] ; CHECK-NEXT: store i32 32, i32* [[ARRAYIDX_US]], align 4 ; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[INC_US:%.*]] = add nuw nsw i32 [[J_014_US]], 1 +; CHECK-NEXT: [[INC_US]] = add nuw nsw i32 [[J_014_US]], 1 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp slt i64 [[INDVAR_NEXT]], [[TMP1]] ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY3_US]], label [[FOR_COND1_FOR_INC4_CRIT_EDGE_US]] ; CHECK: for.cond1.for.inc4_crit_edge.us: