Index: llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -3161,7 +3161,7 @@ void LSRInstance::FinalizeChain(IVChain &Chain) { assert(!Chain.Incs.empty() && "empty IV chains are not allowed"); LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n"); - + for (const IVInc &Inc : Chain) { LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n"); auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand); @@ -6304,7 +6304,7 @@ if (Op.getOp() != dwarf::DW_OP_LLVM_arg) { Op.appendToVector(DestExpr); continue; - } + } DestExpr.push_back(dwarf::DW_OP_LLVM_arg); // `DW_OP_LLVM_arg n` represents the nth LocationOp in this SCEV, @@ -6707,9 +6707,8 @@ if (BI->isUnconditional()) return std::nullopt; Value *TermCond = BI->getCondition(); - if (!isa(TermCond) || !cast(TermCond)->isEquality()) { - LLVM_DEBUG(dbgs() << "Cannot fold on branching condition that is not an " - "ICmpInst::eq / ICmpInst::ne\n"); + if (!isa(TermCond)) { + LLVM_DEBUG(dbgs() << "Cannot fold on branching condition that is not ICmp\n"); return std::nullopt; } if (!TermCond->hasOneUse()) { @@ -6718,6 +6717,11 @@ << "Cannot replace terminating condition with more than one use\n"); return std::nullopt; } + if (!isGuaranteedNotToBeUndefOrPoison(TermCond)) { + LLVM_DEBUG( + dbgs() << "Cannot replace terminating condition that contains poison use\n"); + return std::nullopt; + } // For `IsToFold`, a primary IV can be replaced by other affine AddRec when it // is only used by the terminating condition. To check for this, we may need @@ -6933,10 +6937,10 @@ BranchInst *BI = cast(LoopLatch->getTerminator()); ICmpInst *OldTermCond = cast(BI->getCondition()); IRBuilder<> LatchBuilder(LoopLatch->getTerminator()); - // FIXME: We are adding a use of an IV here without account for poison safety. - // This is incorrect. + + auto NewPredicate = (OldTermCond->getPredicate() == CmpInst::ICMP_EQ) ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE; Value *NewTermCond = LatchBuilder.CreateICmp( - OldTermCond->getPredicate(), LoopValue, TermValue, + NewPredicate, LoopValue, TermValue, "lsr_fold_term_cond.replaced_term_cond"); LLVM_DEBUG(dbgs() << "Old term-cond:\n" Index: llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll =================================================================== --- llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll +++ llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll @@ -97,25 +97,6 @@ ret void } -define void @NonIcmpEqNe(ptr %a) { -; CHECK: Cannot fold on branching condition that is not an ICmpInst::eq / ICmpInst::ne -entry: - %uglygep = getelementptr i8, ptr %a, i64 84 - br label %for.body - -for.body: ; preds = %for.body, %entry - %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ] - %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 379, %entry ] - store i32 1, ptr %lsr.iv1, align 4 - %lsr.iv.next = add nsw i64 %lsr.iv, -1 - %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4 - %exitcond.not = icmp sle i64 %lsr.iv.next, 0 - br i1 %exitcond.not, label %for.end, label %for.body - -for.end: ; preds = %for.body - ret void -} - define void @TermCondMoreThanOneUse(ptr %a) { ;CHECK: Cannot replace terminating condition with more than one use entry: Index: llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll =================================================================== --- llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll +++ llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll @@ -172,3 +172,64 @@ %tobool.not = icmp eq i32 %dec, 0 br i1 %tobool.not, label %for.cond.cleanup, label %for.body } + +; To check correct folding not equality terminating condition +; Due to SLE offset must be - 1604 +define void @IcmpSle(ptr %a) { +; CHECK-LABEL: @IcmpSle( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = getelementptr i8, ptr %a, i64 84 +; CHECK-NEXT: [[END:%.*]] = getelementptr i8, ptr %a, i64 1600 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START]], %entry ] +; CHECK-NEXT: [[STORE:%.*]] = store i32 1, ptr %[[LSR_IV]], align 4 +; CHECK-NEXT: [[LSR_IV_NEXT]] = getelementptr i8, ptr [[LSR_IV]], i64 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[LSR_IV_NEXT]], [[END]] +; CHECK-NEXT: br i1 [[CMP]], label [[BR_END:%.*]], label [[FOR_BODY]] +entry: + %uglygep = getelementptr i8, ptr %a, i64 84 + br label %for.body + +for.body: ; preds = %for.body, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ] + %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 379, %entry ] + store i32 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add nsw i64 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4 + %exitcond.not = icmp sle i64 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; Due to SLT offset must be - 1604 +define void @IcmpSlt(ptr %a) { +; CHECK-LABEL: @IcmpSlt( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = getelementptr i8, ptr %a, i64 84 +; CHECK-NEXT: [[END:%.*]] = getelementptr i8, ptr %a, i64 1604 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START]], %entry ] +; CHECK-NEXT: [[STORE:%.*]] = store i32 1, ptr %[[LSR_IV]], align 4 +; CHECK-NEXT: [[LSR_IV_NEXT]] = getelementptr i8, ptr [[LSR_IV]], i64 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[LSR_IV_NEXT]], [[END]] +; CHECK-NEXT: br i1 [[CMP]], label [[BR_END:%.*]], label [[FOR_BODY]] +entry: + %uglygep = getelementptr i8, ptr %a, i64 84 + br label %for.body + +for.body: ; preds = %for.body, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ] + %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 379, %entry ] + store i32 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add nsw i64 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4 + %exitcond.not = icmp slt i64 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +}