diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -1541,10 +1541,14 @@ bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap(); auto AnotherOpExtKind = ExtKind; - // Check that all uses are either s/zext, or narrow def (in case of we are - // widening the IV increment), or single-input LCSSA Phis. + // Check that all uses are either: + // - narrow def (in case of we are widening the IV increment); + // - single-input LCSSA Phis; + // - comparison of the chosen type; + // - extend of the chosen type (raison d'etre). SmallVector ExtUsers; SmallVector LCSSAPhiUsers; + SmallVector ICmpUsers; for (Use &U : NarrowUse->uses()) { Instruction *User = cast(U.getUser()); if (User == NarrowDef) @@ -1558,6 +1562,19 @@ LCSSAPhiUsers.push_back(LCSSAPhi); continue; } + if (auto *ICmp = dyn_cast(User)) { + auto Pred = ICmp->getPredicate(); + // We have 3 types of predicates: signed, unsigned and equality + // predicates. For equality, it's legal to widen icmp for either sign and + // zero extend. For sign extend, we can also do so for signed predicates, + // likeweise for zero extend we can widen icmp for unsigned predicates. + if (ExtKind == ZeroExtended && ICmpInst::isSigned(Pred)) + return false; + if (ExtKind == SignExtended && ICmpInst::isUnsigned(Pred)) + return false; + ICmpUsers.push_back(ICmp); + continue; + } if (ExtKind == SignExtended) User = dyn_cast(User); else @@ -1655,6 +1672,26 @@ User->replaceAllUsesWith(TruncPN); DeadInsts.emplace_back(User); } + + for (ICmpInst *User : ICmpUsers) { + Builder.SetInsertPoint(User); + auto ExtendedOp = [&](Value * V)->Value * { + if (V == NarrowUse) + return WideBO; + if (ExtKind == ZeroExtended) + return Builder.CreateZExt(V, WideBO->getType()); + else + return Builder.CreateSExt(V, WideBO->getType()); + }; + auto Pred = User->getPredicate(); + auto *LHS = ExtendedOp(User->getOperand(0)); + auto *RHS = ExtendedOp(User->getOperand(1)); + auto *WideCmp = + Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide"); + User->replaceAllUsesWith(WideCmp); + DeadInsts.emplace_back(User); + } + return true; } diff --git a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll --- a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll +++ b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll @@ -795,37 +795,36 @@ unreachable } -; TODO: We can widen here despite the icmp user of %foo in guarded block. define i32 @test16_unsigned_pos1(i32 %start, i32* %p, i32* %q, i32 %x) { ; CHECK-LABEL: @test16_unsigned_pos1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[X:%.*]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[FOO:%.*]] = add i32 [[TMP2]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[GUARDED:%.*]] ; CHECK: guarded: -; CHECK-NEXT: [[ICMP_USER3:%.*]] = icmp ult i32 [[TMP1]], [[X:%.*]] -; CHECK-NEXT: br i1 [[ICMP_USER3]], label [[BACKEDGE]], label [[SIDE_EXIT:%.*]] +; CHECK-NEXT: [[ICMP_USER_WIDE4:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[ICMP_USER_WIDE4]], label [[BACKEDGE]], label [[SIDE_EXIT:%.*]] ; CHECK: backedge: -; CHECK-NEXT: [[INDEX:%.*]] = zext i32 [[FOO]] to i64 -; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[TMP3]] ; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4 -; CHECK-NEXT: [[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q]], align 4 +; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q:%.*]], align 4 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]] ; CHECK: exit: -; CHECK-NEXT: call void @use(i32 -1) -; CHECK-NEXT: ret i32 -1 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 -1 to i32 +; CHECK-NEXT: call void @use(i32 [[TMP4]]) +; CHECK-NEXT: ret i32 [[TMP4]] ; CHECK: failure: -; CHECK-NEXT: [[FOO_LCSSA2:%.*]] = phi i32 [ [[FOO]], [[BACKEDGE]] ] -; CHECK-NEXT: call void @use(i32 [[FOO_LCSSA2]]) +; CHECK-NEXT: [[FOO_LCSSA2_WIDE:%.*]] = phi i64 [ [[TMP3]], [[BACKEDGE]] ] +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[FOO_LCSSA2_WIDE]] to i32 +; CHECK-NEXT: call void @use(i32 [[TMP5]]) ; CHECK-NEXT: unreachable ; CHECK: side_exit: ; CHECK-NEXT: ret i32 0