Index: llvm/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1449,25 +1449,44 @@ std::swap(LHS, RHS); } - // Match (icmp signed-cond zext, RHS) - Value *LHSOp = nullptr; - if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned()) - continue; + if (ICmp->isSigned()) { + // Turn A - IV > B --> IV < A - B + Value *LHSOp = nullptr, *RHSOp = nullptr; + if (match(LHS, m_Sub(m_Value(LHSOp), m_Value(RHSOp)))) { + auto *AR = dyn_cast(SE->getSCEV(RHSOp)); + auto *LHSOpS = SE->getSCEV(LHSOp); + auto *RHSS = SE->getSCEV(RHS); + if (AR && AR->getLoop() == L && AR->isAffine() && SE->isKnownPositive(AR->getStepRecurrence(*SE)) && + L->isLoopInvariant(LHSOp) && SE->isKnownNonNegative(LHSOpS) && SE->isKnownNonNegative(RHSS)) { + IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); + RHS = Builder.CreateSub(LHSOp, RHS, "canonical.rhs"); + ICmp->setOperand(0, LHS = RHSOp); + ICmp->setOperand(1, RHS); + ICmp->setPredicate(ICmp->getSwappedPredicate()); + // Note: No SCEV invalidation needed. We've changed the predicate, but + // have not changed exit counts, or the values produced by the compare. + Changed = true; + } + } - const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); - const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); - const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); - auto FullCR = ConstantRange::getFull(InnerBitWidth); - FullCR = FullCR.zeroExtend(OuterBitWidth); - auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); - if (FullCR.contains(RHSCR)) { - // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus - // replace the signed condition with the unsigned version. - ICmp->setPredicate(ICmp->getUnsignedPredicate()); - Changed = true; - // Note: No SCEV invalidation needed. We've changed the predicate, but - // have not changed exit counts, or the values produced by the compare. - continue; + // Match (icmp signed-cond zext, RHS) + if (match(LHS, m_ZExt(m_Value(LHSOp)))) { + const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); + const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); + const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); + auto FullCR = ConstantRange::getFull(InnerBitWidth); + FullCR = FullCR.zeroExtend(OuterBitWidth); + auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); + if (FullCR.contains(RHSCR)) { + // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus + // replace the signed condition with the unsigned version. + ICmp->setPredicate(ICmp->getUnsignedPredicate()); + Changed = true; + // Note: No SCEV invalidation needed. We've changed the predicate, but + // have not changed exit counts, or the values produced by the compare. + continue; + } + } } } Index: llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll +++ llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll @@ -141,3 +141,55 @@ exit: ret i32 %iv } + +; capacity - iv < 4 ---> iv > capacity - 4 +define i32 @test_03(ptr %p, ptr %capacity_p, ptr %num_elements_p) { +; CHECK-LABEL: @test_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CAPACITY:%.*]] = load i32, ptr [[CAPACITY_P:%.*]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[NUM_ELEMENTS:%.*]] = load i32, ptr [[NUM_ELEMENTS_P:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[CANONICAL_RHS:%.*]] = sub i32 [[CAPACITY]], 4 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[BYTES_TO_WRITE:%.*]] = sub nsw i32 [[CAPACITY]], [[IV]] +; CHECK-NEXT: [[CAPACITY_CHECK:%.*]] = icmp sgt i32 [[IV]], [[CANONICAL_RHS]] +; CHECK-NEXT: br i1 [[CAPACITY_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[IV]] +; CHECK-NEXT: store i32 1, ptr [[EL_PTR]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[NUM_ELEMENTS]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ] +; CHECK-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; CHECK: out_of_bounds: +; CHECK-NEXT: ret i32 -1 +; +entry: + %capacity = load i32, ptr %capacity_p, !range !0 + %num_elements = load i32, ptr %num_elements_p, !range !0 + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %bytes_to_write = sub i32 %capacity, %iv + %capacity_check = icmp slt i32 %bytes_to_write, 4 + br i1 %capacity_check, label %out_of_bounds, label %backedge + +backedge: + %el.ptr = getelementptr i32, ptr %p, i32 %iv + store i32 1, ptr %el.ptr + %iv.next = add nuw nsw i32 %iv, 4 + %loop_cond = icmp slt i32 %iv.next, %num_elements + br i1 %loop_cond, label %loop, label %exit + +exit: + ret i32 %iv.next + +out_of_bounds: + ret i32 -1 +} + +!0 = !{i32 1, i32 2147483648}