Index: llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6707,9 +6707,9 @@ if (BI->isUnconditional()) return std::nullopt; Value *TermCond = BI->getCondition(); - if (!isa(TermCond) || !cast(TermCond)->isEquality()) { - LLVM_DEBUG(dbgs() << "Cannot fold on branching condition that is not an " - "ICmpInst::eq / ICmpInst::ne\n"); + if (!isa(TermCond)) { + LLVM_DEBUG( + dbgs() << "Cannot fold on branching condition that is not ICmpInst\n"); return std::nullopt; } if (!TermCond->hasOneUse()) { @@ -6935,9 +6935,12 @@ IRBuilder<> LatchBuilder(LoopLatch->getTerminator()); // FIXME: We are adding a use of an IV here without account for poison safety. // This is incorrect. - Value *NewTermCond = LatchBuilder.CreateICmp( - OldTermCond->getPredicate(), LoopValue, TermValue, - "lsr_fold_term_cond.replaced_term_cond"); + Value *NewTermCond = + LatchBuilder.CreateICmp(CmpInst::ICMP_EQ, LoopValue, TermValue, + "lsr_fold_term_cond.replaced_term_cond"); + // Swap successors to exit loop body if IV equals to new TermValue + if (BI->getSuccessor(0) == L->getHeader()) + BI->swapSuccessors(); LLVM_DEBUG(dbgs() << "Old term-cond:\n" << *OldTermCond << "\n" Index: llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll =================================================================== --- llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll +++ llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll @@ -97,8 +97,8 @@ ret void } -define void @NonIcmpEqNe(ptr %a) { -; CHECK: Cannot fold on branching condition that is not an ICmpInst::eq / ICmpInst::ne +define void @NonIcmp(ptr %a) { +; CHECK: Cannot fold on branching condition that is not ICmpInst entry: %uglygep = getelementptr i8, ptr %a, i64 84 br label %for.body @@ -110,7 +110,8 @@ %lsr.iv.next = add nsw i64 %lsr.iv, -1 %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4 %exitcond.not = icmp sle i64 %lsr.iv.next, 0 - br i1 %exitcond.not, label %for.end, label %for.body + %find.cond = and i1 %exitcond.not, 1 + br i1 %find.cond, label %for.end, label %for.body for.end: ; preds = %for.body ret void Index: llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll =================================================================== --- llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll +++ llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll @@ -172,3 +172,212 @@ %tobool.not = icmp eq i32 %dec, 0 br i1 %tobool.not, label %for.cond.cleanup, label %for.body } + +; To check correct folding not equality terminating condition +; Due to SLE offset must be - 1600 +define void @IcmpSle(ptr %a) { +; CHECK-LABEL: @IcmpSle( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i32 1600 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4 +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %uglygep = getelementptr i8, ptr %a, i32 84 + br label %for.body + +for.body: ; preds = %for.body, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ] + %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 379, %entry ] + store i32 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 4 + %exitcond.not = icmp sle i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; Due to SLT offset must be - 1604 +define void @IcmpSlt(ptr %a) { +; CHECK-LABEL: @IcmpSlt( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i32 1604 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4 +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %uglygep = getelementptr i8, ptr %a, i32 84 + br label %for.body + +for.body: ; preds = %for.body, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ] + %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 379, %entry ] + store i32 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 4 + %exitcond.not = icmp slt i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; Invert predicate and branches +define void @IcmpSgt(ptr %a) { +; CHECK-LABEL: @IcmpSgt( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i32 88 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4 +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %uglygep = getelementptr i8, ptr %a, i32 84 + br label %for.body + +for.body: ; preds = %for.body, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ] + %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 379, %entry ] + store i32 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 4 + %exitcond.not = icmp sgt i32 0, %lsr.iv.next + br i1 %exitcond.not, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + +; Invert predicate and branches +define void @IcmpSgt2(ptr %a) { +; CHECK-LABEL: @IcmpSgt2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i32 1600 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4 +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %uglygep = getelementptr i8, ptr %a, i32 84 + br label %for.body + +for.body: ; preds = %for.body, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ] + %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 379, %entry ] + store i32 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 4 + %exitcond.not = icmp sgt i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + +; Invert predicate and branches +define void @SeveralLoopLatch(ptr %a) { +; CHECK-LABEL: @SeveralLoopLatch( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i32 1600 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[ANOTHER_BRANCH:%.*]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4 +; CHECK-NEXT: br label [[ANOTHER_BRANCH]] +; CHECK: another.branch: +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %uglygep = getelementptr i8, ptr %a, i32 84 + br label %for.body + +for.body: ; preds = %another.branch, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %another.branch ], [ %uglygep, %entry ] + %lsr.iv = phi i32 [ %lsr.iv.next, %another.branch ], [ 379, %entry ] + store i32 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 4 + br label %another.branch + +another.branch: + %exitcond.not = icmp sgt i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + +; Invert branch in SeveralLoopLatch +define void @SeveralLoopLatch2(ptr %a) { +; CHECK-LABEL: @SeveralLoopLatch2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i32 1600 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[ANOTHER_BRANCH:%.*]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4 +; CHECK-NEXT: br label [[ANOTHER_BRANCH]] +; CHECK: another.branch: +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %uglygep = getelementptr i8, ptr %a, i32 84 + br label %for.body + +for.body: ; preds = %another.branch, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %another.branch ], [ %uglygep, %entry ] + %lsr.iv = phi i32 [ %lsr.iv.next, %another.branch ], [ 379, %entry ] + store i32 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 4 + br label %another.branch + +another.branch: + %exitcond.not = icmp sle i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +}