diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -1542,16 +1542,26 @@ auto AnotherOpExtKind = ExtKind; // Check that all uses are either s/zext, or narrow def (in case of we are - // widening the IV increment). + // widening the IV increment), or single-input LCSSA Phis. SmallVector ExtUsers; + SmallVector LCSSAPhiUsers; for (Use &U : NarrowUse->uses()) { - if (U.getUser() == NarrowDef) + Instruction *User = cast(U.getUser()); + if (User == NarrowDef) continue; - Instruction *User = nullptr; + if (!L->contains(User)) { + auto *LCSSAPhi = cast(User); + // Make sure there is only 1 input, so that we don't have to split + // critical edges. + if (LCSSAPhi->getNumOperands() != 1) + return false; + LCSSAPhiUsers.push_back(LCSSAPhi); + continue; + } if (ExtKind == SignExtended) - User = dyn_cast(U.getUser()); + User = dyn_cast(User); else - User = dyn_cast(U.getUser()); + User = dyn_cast(User); if (!User || User->getType() != WideType) return false; ExtUsers.push_back(User); @@ -1630,6 +1640,21 @@ User->replaceAllUsesWith(WideBO); DeadInsts.emplace_back(User); } + + for (PHINode *User : LCSSAPhiUsers) { + assert(User->getNumOperands() == 1 && "Checked before!"); + Builder.SetInsertPoint(User); + auto *WidePN = + Builder.CreatePHI(WideBO->getType(), 1, User->getName() + ".wide"); + BasicBlock *LoopExitingBlock = User->getParent()->getSinglePredecessor(); + assert(LoopExitingBlock && L->contains(LoopExitingBlock) && + "Not a LCSSA Phi?"); + WidePN->addIncoming(WideBO, LoopExitingBlock); + Builder.SetInsertPoint(User->getParent()->getFirstNonPHI()); + auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType()); + User->replaceAllUsesWith(TruncPN); + DeadInsts.emplace_back(User); + } return true; } diff --git a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll --- a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll +++ b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll @@ -697,20 +697,18 @@ ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[FOO:%.*]] = add i32 [[TMP1]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[BACKEDGE]] ; CHECK: backedge: -; CHECK-NEXT: [[INDEX:%.*]] = zext i32 [[FOO]] to i64 -; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[TMP1]] ; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4 -; CHECK-NEXT: [[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q]], align 4 +; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q:%.*]], align 4 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]] ; CHECK: exit: -; CHECK-NEXT: ret i32 -1 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 -1 to i32 +; CHECK-NEXT: ret i32 [[TMP2]] ; CHECK: failure: ; CHECK-NEXT: unreachable ; @@ -750,24 +748,23 @@ ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[FOO:%.*]] = add i32 [[TMP1]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[BACKEDGE]] ; CHECK: backedge: -; CHECK-NEXT: [[INDEX:%.*]] = zext i32 [[FOO]] to i64 -; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[TMP1]] ; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4 -; CHECK-NEXT: [[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q]], align 4 +; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q:%.*]], align 4 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]] ; CHECK: exit: -; CHECK-NEXT: call void @use(i32 -1) -; CHECK-NEXT: ret i32 -1 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 -1 to i32 +; CHECK-NEXT: call void @use(i32 [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP2]] ; CHECK: failure: -; CHECK-NEXT: [[FOO_LCSSA1:%.*]] = phi i32 [ [[FOO]], [[BACKEDGE]] ] -; CHECK-NEXT: call void @use(i32 [[FOO_LCSSA1]]) +; CHECK-NEXT: [[FOO_LCSSA1_WIDE:%.*]] = phi i64 [ [[TMP1]], [[BACKEDGE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[FOO_LCSSA1_WIDE]] to i32 +; CHECK-NEXT: call void @use(i32 [[TMP3]]) ; CHECK-NEXT: unreachable ; entry: