Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -2564,33 +2564,82 @@ if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS) || !LHS->hasOneUse()) return false; Value *VariantOp, *InvariantOp; - - // TODO: support sub. - if (!match(LHS, m_Add(m_Value(VariantOp), m_Value(InvariantOp)))) + // If we have a sub, we represent it as "add" where the 2nd operand is + // negated. + bool NegateVariantOp = false, NegateInvariantOp = false; + if (match(LHS, m_Sub(m_Value(VariantOp), m_Value(InvariantOp)))) + NegateInvariantOp = true; + else if (!match(LHS, m_Add(m_Value(VariantOp), m_Value(InvariantOp)))) return false; - - // Split off invariant argument from LHS. - if (L.isLoopInvariant(VariantOp)) + if (L.isLoopInvariant(VariantOp)) { std::swap(VariantOp, InvariantOp); + std::swap(NegateVariantOp, NegateInvariantOp); + } if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp)) return false; if (!cast(LHS)->hasNoSignedWrap()) return false; - // Now, try to turn (VariantOp + InvariantOp RHS) into (VariantOp - // RHS - InvariantOp). It is only legal if we can prove that the - // subtraction won't overflow. + // Now we have the following pattern: + // NVO * VariantOp + NIO * InvariantOp RHS. + // Here: + // NVO = pow(-1, NegateVariantOp), + // NIO = pow(-1, NegateInvariantOp). + // The transforms we want to do are the following: + // + // Step 1: move all invariants to right hand side of inequality. + // NVO * VariantOp RHS + (-1) * NIO * InvariantOp. + // + // Step 2: Get rid of NVO by multiplying both parts of inequality by it + // (we may need to swap the predicate): + // VariantOp RHS * NVO + (-1) * NVO * NIO * InvariantOp. + // + // Finally: + // - We need to swap predicate if NVO was -1 (or NegateVariantOp was true); + // - We need to take RHS with negative sign if + // NVO = -1 (or NegateVariantOp was true); + // - We need to take InvariantOp with negative sign if + // ((-1) * NVO * NIO) = -1, or (true ^ NegateVariantOp ^ NegateInvariantOp) + // was true. + // + // After that, just add them with these signs. + + bool ShouldSwapPredicate = NegateVariantOp; + bool ShouldNegateRHS = NegateVariantOp; + bool ShouldNegateInvariantOp = NegateVariantOp ^ NegateInvariantOp ^ true; + assert(!(ShouldNegateRHS && ShouldNegateInvariantOp) && + "Could not end up negating both operands"); + + // Make sure no overflow happens while computing invariant sum. const SCEV *RHSS = SE.getSCEV(RHS); - const SCEV *InvariantOpS = SE.getNegativeSCEV(SE.getSCEV(InvariantOp)); + const SCEV *InvariantOpS = SE.getSCEV(InvariantOp); + if (ShouldNegateRHS) + RHSS = SE.getNegativeSCEV(RHSS); + if (ShouldNegateInvariantOp) + InvariantOpS = SE.getNegativeSCEV(InvariantOpS); if (!SE.willNotOverflow(Instruction::BinaryOps::Add, /*Signed*/ true, RHSS, InvariantOpS, /*CtxI*/ &I)) return false; + + if (ShouldSwapPredicate) + Pred = ICmpInst::getSwappedPredicate(Pred); auto *Preheader = L.getLoopPreheader(); assert(Preheader && "Loop is not in simplify form?"); IRBuilder<> Builder(Preheader->getTerminator()); - Value *NewCmpOp = Builder.CreateSub(RHS, InvariantOp, "invariant.op", - /*HasNUW*/ false, /*HasNSW*/ true); + // Above we have asserted that both RHS and InvariantOp cannot be taken with + // negative sign. It means that we can always end up creating one sub or one + // add without extra instructions for negation. + Value *NewCmpOp = nullptr; + if (ShouldNegateInvariantOp) + NewCmpOp = Builder.CreateSub(RHS, InvariantOp, "invariant.op", + /*HasNUW*/ false, /*HasNSW*/ true); + else if (ShouldNegateRHS) + NewCmpOp = Builder.CreateSub(InvariantOp, RHS, "invariant.op", + /*HasNUW*/ false, /*HasNSW*/ true); + else + NewCmpOp = Builder.CreateAdd(InvariantOp, RHS, "invariant.op", + /*HasNUW*/ false, /*HasNSW*/ true); auto *ICmp = cast(&I); ICmp->setPredicate(Pred); ICmp->setOperand(0, VariantOp); Index: llvm/test/Transforms/LICM/hoist-add-sub.ll =================================================================== --- llvm/test/Transforms/LICM/hoist-add-sub.ll +++ llvm/test/Transforms/LICM/hoist-add-sub.ll @@ -1,18 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -passes=licm -verify-memoryssa < %s | FileCheck %s -; TODO: x - iv < 4 ==> iv > x - 4 +; x - iv < 4 ==> iv > x - 4 define i32 @test_01(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_01 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0:![0-9]+]] ; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 [[X]], 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[ARITH:%.*]] = sub nsw i32 [[X]], [[IV]] -; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4 +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp sgt i32 [[IV]], [[INVARIANT_OP]] ; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] @@ -278,18 +278,18 @@ ret i32 -2 } -; TODO: iv - x < 4 ==> iv < 4 + x +; iv - x < 4 ==> iv < 4 + x define i32 @test_03(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_03 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG1:![0-9]+]] ; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add nsw i32 [[X]], 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[ARITH:%.*]] = sub nsw i32 [[IV]], [[X]] -; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4 +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]] ; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]