Index: llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6707,9 +6707,9 @@ if (BI->isUnconditional()) return std::nullopt; Value *TermCond = BI->getCondition(); - if (!isa(TermCond) || !cast(TermCond)->isEquality()) { - LLVM_DEBUG(dbgs() << "Cannot fold on branching condition that is not an " - "ICmpInst::eq / ICmpInst::ne\n"); + if (!isa(TermCond)) { + LLVM_DEBUG( + dbgs() << "Cannot fold on branching condition that is not ICmpInst\n"); return std::nullopt; } if (!TermCond->hasOneUse()) { @@ -6718,6 +6718,15 @@ << "Cannot replace terminating condition with more than one use\n"); return std::nullopt; } + ICmpInst *TermCondCmp = cast(TermCond); + for (auto *U = TermCondCmp->op_begin(); U != TermCondCmp->op_end(); ++U) { + // PoisonValue -> UndefValue + if (dyn_cast(U->get())) { + LLVM_DEBUG(dbgs() << "Cannot replace terminating condition that contains " + "undef or poison operand\n"); + return std::nullopt; + } + } // For `IsToFold`, a primary IV can be replaced by other affine AddRec when it // is only used by the terminating condition. To check for this, we may need @@ -6933,11 +6942,12 @@ BranchInst *BI = cast(LoopLatch->getTerminator()); ICmpInst *OldTermCond = cast(BI->getCondition()); IRBuilder<> LatchBuilder(LoopLatch->getTerminator()); - // FIXME: We are adding a use of an IV here without account for poison safety. - // This is incorrect. - Value *NewTermCond = LatchBuilder.CreateICmp( - OldTermCond->getPredicate(), LoopValue, TermValue, - "lsr_fold_term_cond.replaced_term_cond"); + auto NewPredicate = (OldTermCond->getPredicate() == CmpInst::ICMP_EQ) + ? CmpInst::ICMP_EQ + : CmpInst::ICMP_NE; + Value *NewTermCond = + LatchBuilder.CreateICmp(NewPredicate, LoopValue, TermValue, + "lsr_fold_term_cond.replaced_term_cond"); LLVM_DEBUG(dbgs() << "Old term-cond:\n" << *OldTermCond << "\n" Index: llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll =================================================================== --- llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll +++ llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll @@ -97,8 +97,8 @@ ret void } -define void @NonIcmpEqNe(ptr %a) { -; CHECK: Cannot fold on branching condition that is not an ICmpInst::eq / ICmpInst::ne +define void @NonIcmp(ptr %a) { +; CHECK: Cannot fold on branching condition that is not ICmpInst entry: %uglygep = getelementptr i8, ptr %a, i64 84 br label %for.body @@ -110,7 +110,8 @@ %lsr.iv.next = add nsw i64 %lsr.iv, -1 %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4 %exitcond.not = icmp sle i64 %lsr.iv.next, 0 - br i1 %exitcond.not, label %for.end, label %for.body + %find.cond = and i1 %exitcond.not, 1 + br i1 %find.cond, label %for.end, label %for.body for.end: ; preds = %for.body ret void @@ -214,3 +215,22 @@ alac_pakt_block_offset.exit: ; preds = %for.body.i ret i64 0 } + +define void @PoisonValue(ptr %a) { +; CHECK: Cannot replace terminating condition that contains undef or poison operand +entry: + %uglygep = getelementptr i8, ptr %a, i64 84 + br label %for.body + +for.body: ; preds = %for.body, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ] + %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 379, %entry ] + store i32 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add nsw i64 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4 + %exitcond.not = icmp sle i64 %lsr.iv.next, poison + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} Index: llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll =================================================================== --- llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll +++ llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll @@ -172,3 +172,64 @@ %tobool.not = icmp eq i32 %dec, 0 br i1 %tobool.not, label %for.cond.cleanup, label %for.body } + +; To check correct folding not equality terminating condition +; Due to SLE offset must be - 1604 +define void @IcmpSle(ptr %a) { +; CHECK-LABEL: @IcmpSle( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = getelementptr i8, ptr %a, i32 84 +; CHECK-NEXT: [[END:%.*]] = getelementptr i8, ptr %a, i32 1600 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[START]], %entry ] +; CHECK-NEXT: store i32 1, ptr [[LSR_IV]], align 4 +; CHECK-NEXT: [[LSR_IV_NEXT]] = getelementptr i8, ptr [[LSR_IV]], i32 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[LSR_IV_NEXT]], [[END]] +; CHECK-NEXT: br i1 [[CMP]], label [[BR_END:%.*]], label %[[FOR_BODY]] +entry: + %uglygep = getelementptr i8, ptr %a, i32 84 + br label %for.body + +for.body: ; preds = %for.body, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ] + %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 379, %entry ] + store i32 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 4 + %exitcond.not = icmp sle i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; Due to SLT offset must be - 1604 +define void @IcmpSlt(ptr %a) { +; CHECK-LABEL: @IcmpSlt( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = getelementptr i8, ptr %a, i32 84 +; CHECK-NEXT: [[END:%.*]] = getelementptr i8, ptr %a, i32 1604 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[START]], %entry ] +; CHECK-NEXT: store i32 1, ptr [[LSR_IV]], align 4 +; CHECK-NEXT: [[LSR_IV_NEXT]] = getelementptr i8, ptr [[LSR_IV]], i32 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[LSR_IV_NEXT]], [[END]] +; CHECK-NEXT: br i1 [[CMP]], label [[BR_END:%.*]], label %[[FOR_BODY]] +entry: + %uglygep = getelementptr i8, ptr %a, i32 84 + br label %for.body + +for.body: ; preds = %for.body, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ] + %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 379, %entry ] + store i32 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 4 + %exitcond.not = icmp slt i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +}