diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -2572,6 +2572,68 @@ return true; } +/// Try to reassociate and hoist the following two patterns: +/// LV - C1 < C2 --> LV < C1 + C2, +/// C1 - LV < C2 --> LV > C1 - C2. +static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS, + Value *InvariantRHS, ICmpInst &ICmp, Loop &L, + ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, + AssumptionCache *AC, DominatorTree *DT) { + assert(ICmpInst::isSigned(Pred) && "Not supported yet!"); + assert(!L.isLoopInvariant(VariantLHS) && "Precondition."); + assert(L.isLoopInvariant(InvariantRHS) && "Precondition."); + + // Try to represent VariantLHS as sum of invariant and variant operands. + using namespace PatternMatch; + Value *VariantOp, *InvariantOp; + if (!match(VariantLHS, m_NSWSub(m_Value(VariantOp), m_Value(InvariantOp)))) + return false; + + bool VariantSubtracted = false; + // LHS itself is a loop-variant, try to represent it in the form: + // "VariantOp + InvariantOp". If it is possible, then we can reassociate. If + // the variant operand goes with minus, we use a slightly different scheme. + if (L.isLoopInvariant(VariantOp)) { + std::swap(VariantOp, InvariantOp); + VariantSubtracted = true; + Pred = ICmpInst::getSwappedPredicate(Pred); + } + if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp)) + return false; + + // In order to turn "LV - C1 < C2" into "LV < C2 + C1", we need to be able to + // freely move values from left side of inequality to right side (just as in + // normal linear arithmetics). Overflows make things much more complicated, so + // we want to avoid this. Likewise, for "C1 - LV < C2" we need to prove that + // "C1 - C2" does not overflow. + auto &DL = L.getHeader()->getModule()->getDataLayout(); + if (VariantSubtracted) { + // C1 - LV < C2 --> LV > C1 - C2 + if (computeOverflowForSignedSub(InvariantOp, InvariantRHS, DL, AC, &ICmp, + DT) != llvm::OverflowResult::NeverOverflows) + return false; + } else { + // LV - C1 < C2 --> LV < C1 + C2 + if (computeOverflowForSignedAdd(InvariantOp, InvariantRHS, DL, AC, &ICmp, + DT) != llvm::OverflowResult::NeverOverflows) + return false; + } + auto *Preheader = L.getLoopPreheader(); + assert(Preheader && "Loop is not in simplify form?"); + IRBuilder<> Builder(Preheader->getTerminator()); + Value *NewCmpOp = + VariantSubtracted + ? Builder.CreateSub(InvariantOp, InvariantRHS, "invariant.op", + /*HasNUW*/ false, /*HasNSW*/ true) + : Builder.CreateAdd(InvariantOp, InvariantRHS, "invariant.op", + /*HasNUW*/ false, /*HasNSW*/ true); + ICmp.setPredicate(Pred); + ICmp.setOperand(0, VariantOp); + ICmp.setOperand(1, NewCmpOp); + eraseInstruction(cast(*VariantLHS), SafetyInfo, MSSAU); + return true; +} + /// Reassociate and hoist add/sub expressions. static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, AssumptionCache *AC, @@ -2601,7 +2663,8 @@ if (hoistAdd(Pred, LHS, RHS, cast(I), L, SafetyInfo, MSSAU, AC, DT)) return true; - // TODO: Support Sub. + if (hoistSub(Pred, LHS, RHS, cast(I), L, SafetyInfo, MSSAU, AC, DT)) + return true; return false; } diff --git a/llvm/test/Transforms/LICM/hoist-add-sub.ll b/llvm/test/Transforms/LICM/hoist-add-sub.ll --- a/llvm/test/Transforms/LICM/hoist-add-sub.ll +++ b/llvm/test/Transforms/LICM/hoist-add-sub.ll @@ -1,18 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -passes=licm -verify-memoryssa < %s | FileCheck %s -; TODO: x - iv < 4 ==> iv > x - 4 +; x - iv < 4 ==> iv > x - 4 define i32 @test_01(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_01 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0:![0-9]+]] ; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 [[X]], 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[ARITH:%.*]] = sub nsw i32 [[X]], [[IV]] -; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4 +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp sgt i32 [[IV]], [[INVARIANT_OP]] ; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] @@ -278,18 +278,18 @@ ret i32 -2 } -; TODO: iv - x < 4 ==> iv < 4 + x +; iv - x < 4 ==> iv < 4 + x define i32 @test_03(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_03 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG1:![0-9]+]] ; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add nsw i32 [[X]], 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[ARITH:%.*]] = sub nsw i32 [[IV]], [[X]] -; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4 +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]] ; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]