Index: llvm/lib/Transforms/Scalar/LoopFlatten.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopFlatten.cpp +++ llvm/lib/Transforms/Scalar/LoopFlatten.cpp @@ -89,15 +89,22 @@ BranchInst *InnerBranch = nullptr; BranchInst *OuterBranch = nullptr; SmallPtrSet LinearIVUses; + + // Holds the old/narrow induction phi, i.e. the Phi before IV widening has + // been applied. This bookkeeping is used so we can skip some checks on these + // phi nodes. SmallPtrSet InnerPHIsToTransform; + PHINode *NarrowOuterInductionPHI = nullptr; // Whether this holds the flatten info before or after widening. bool Widened = false; - // Holds the old/narrow induction phis, i.e. the Phis before IV widening has - // been applied. This bookkeeping is used so we can skip some checks on these - // phi nodes. - SmallPtrSet OldInductionPHIs; + bool isNarrowInductionPhi(PHINode *Phi) { + // This can't be the narrow phi if we haven't widened the IV first. + if (!Widened) + return false; + return InnerPHIsToTransform.count(Phi) || NarrowOuterInductionPHI == Phi; + } FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL) {}; }; @@ -268,7 +275,7 @@ // them specially when doing the transformation. if (&InnerPHI == FI.InnerInductionPHI) continue; - if (FI.Widened && FI.OldInductionPHIs.count(&InnerPHI)) + if (FI.isNarrowInductionPhi(&InnerPHI)) continue; // Each inner loop PHI node must have two incoming values/blocks - one @@ -315,7 +322,7 @@ } for (PHINode &OuterPHI : FI.OuterLoop->getHeader()->phis()) { - if (FI.Widened && FI.OldInductionPHIs.count(&OuterPHI)) + if (FI.isNarrowInductionPhi(&OuterPHI)) continue; if (!SafeOuterPHIs.count(&OuterPHI)) { LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump()); @@ -705,22 +712,22 @@ return true; }; - bool Deleted; - if (!CreateWideIV({FI.InnerInductionPHI, MaxLegalType, false }, Deleted)) + bool TriviallyDeletedPhi; + if (!CreateWideIV({FI.InnerInductionPHI, MaxLegalType, false }, + TriviallyDeletedPhi)) return false; - // If the inner Phi node cannot be trivially deleted, we need to at least - // bring it in a consistent state. - if (!Deleted) - FI.InnerInductionPHI->removeIncomingValue(FI.InnerLoop->getLoopLatch()); - if (!CreateWideIV({FI.OuterInductionPHI, MaxLegalType, false }, Deleted)) + + bool Dummy; + if (!CreateWideIV({FI.OuterInductionPHI, MaxLegalType, false }, Dummy)) return false; assert(Widened && "Widened IV expected"); FI.Widened = true; // Save the old/narrow induction phis, which we need to ignore in CheckPHIs. - FI.OldInductionPHIs.insert(FI.InnerInductionPHI); - FI.OldInductionPHIs.insert(FI.OuterInductionPHI); + if (!TriviallyDeletedPhi) + FI.InnerPHIsToTransform.insert(FI.InnerInductionPHI); + FI.NarrowOuterInductionPHI = FI.OuterInductionPHI; // After widening, rediscover all the loop components. return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI); Index: llvm/test/Transforms/LoopFlatten/widen-iv2.ll =================================================================== --- llvm/test/Transforms/LoopFlatten/widen-iv2.ll +++ llvm/test/Transforms/LoopFlatten/widen-iv2.ll @@ -37,7 +37,7 @@ ; CHECK-NEXT: br label [[FOR_BODY3_US:%.*]] ; CHECK: for.body3.us: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY3_US]] ], [ 0, [[FOR_COND1_PREHEADER_US]] ] -; CHECK-NEXT: [[J_014_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ] +; CHECK-NEXT: [[J_014_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INC_US:%.*]], [[FOR_BODY3_US]] ] ; CHECK-NEXT: [[TMP7:%.*]] = add nsw i64 [[INDVAR]], [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = sext i32 [[J_014_US]] to i64 ; CHECK-NEXT: [[TMP9:%.*]] = add nsw i64 [[TMP8]], [[TMP5]] @@ -46,7 +46,7 @@ ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 [[TMP7]] ; CHECK-NEXT: store i32 32, i32* [[ARRAYIDX_US]], align 4 ; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[INC_US:%.*]] = add nuw nsw i32 [[J_014_US]], 1 +; CHECK-NEXT: [[INC_US]] = add nuw nsw i32 [[J_014_US]], 1 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp slt i64 [[INDVAR_NEXT]], [[TMP1]] ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY3_US]], label [[FOR_COND1_FOR_INC4_CRIT_EDGE_US]] ; CHECK: for.cond1.for.inc4_crit_edge.us: