Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -107,6 +107,8 @@ "Number of min/max expressions hoisted out of the loop"); STATISTIC(NumGEPsHoisted, "Number of geps reassociated and hoisted out of the loop"); +STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated " + "and hoisted out of the loop"); /// Memory promotion is enabled by default. static cl::opt @@ -173,7 +175,7 @@ static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU); /// Aggregates various functions for hoisting computations out of loop. -static bool hoistArithmetics(Instruction &I, Loop &L, +static bool hoistArithmetics(Instruction &I, Loop &L, ScalarEvolution &SE, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, AssumptionCache *AC, DominatorTree *DT); @@ -987,7 +989,7 @@ // Try to reassociate instructions so that part of computations can be // done out of loop. - if (hoistArithmetics(I, *CurLoop, *SafetyInfo, MSSAU, AC, DT)) { + if (hoistArithmetics(I, *CurLoop, *SE, *SafetyInfo, MSSAU, AC, DT)) { Changed = true; continue; } @@ -2547,10 +2549,103 @@ return true; } -static bool hoistArithmetics(Instruction &I, Loop &L, +static bool hoistAddSub(Instruction &I, Loop &L, ScalarEvolution &SE, + ICFLoopSafetyInfo &SafetyInfo, + MemorySSAUpdater &MSSAU) { + using namespace PatternMatch; + ICmpInst::Predicate Pred; + Value *LHS, *RHS; + if (!match(&I, m_ICmp(Pred, m_Value(LHS), m_Value(RHS)))) + return false; + + if (L.isLoopInvariant(LHS)) { + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + if (!ICmpInst::isSigned(Pred) || L.isLoopInvariant(LHS) || + !L.isLoopInvariant(RHS) || !LHS->hasOneUse()) + return false; + Value *VariantOp, *InvariantOp; + bool NegateVariantOp = false, NegateInvariantOp = false; + if (match(LHS, m_Sub(m_Value(VariantOp), m_Value(InvariantOp)))) + NegateInvariantOp = true; + else if (!match(LHS, m_Add(m_Value(VariantOp), m_Value(InvariantOp)))) + return false; + + if (L.isLoopInvariant(VariantOp)) { + std::swap(VariantOp, InvariantOp); + std::swap(NegateVariantOp, NegateInvariantOp); + } + + if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp)) + return false; + + if (!cast(LHS)->hasNoSignedWrap()) + return false; + + // Now we have the following pattern: + // pow(-1, NegateVariantOp) * VariantOp + + // pow(-1, NegateInvariantOp) * InvariantOp + // RHS. + // The transforms we want to do are the following: + // Step 1: move all invariants to right hand side of inequality. + // pow(-1, NegateVariantOp) * VariantOp + // RHS - + // pow(-1, NegateInvariantOp) * InvariantOp, + // or the same can be rewritten as + // pow(-1, NegateVariantOp) * VariantOp + // RHS + pow(-1, NegateInvariantOp + 1) * InvariantOp. + // Step 2: if variant op is inverted, multiply both sides by -1 (and swap + // predicate if needed.) + // VariantOp + // pow(-1, NegateVariantOp) * RHS + + // pow(-1, NegateVariantOp + NegateInvariantOp + 1) * InvariantOp. + + bool ShouldSwapPredicate = NegateVariantOp; + bool ShouldNegateOp1 = NegateVariantOp; + bool ShouldNegateOp2 = NegateVariantOp ^ NegateInvariantOp ^ true; + assert(!(ShouldNegateOp1 && ShouldNegateOp2) && + "Could not end up negating both operands"); + + Value *Op1 = RHS; + Value *Op2 = InvariantOp; + if (ShouldNegateOp1) { + std::swap(Op1, Op2); + std::swap(ShouldNegateOp1, ShouldNegateOp2); + } + + // Make sure no overflow happens while computing invariant sum. + const SCEV *Op1S = SE.getSCEV(Op1); + const SCEV *Op2S = SE.getSCEV(Op2); + if (ShouldNegateOp2) + Op2S = SE.getNegativeSCEV(Op2S); + if (!SE.willNotOverflow(Instruction::BinaryOps::Add, /*Signed*/ true, Op1S, + Op2S, /*CtxI*/ &I)) + return false; + + if (ShouldSwapPredicate) + Pred = ICmpInst::getSwappedPredicate(Pred); + auto *Preheader = L.getLoopPreheader(); + assert(Preheader && "Loop is not in simplify form?"); + IRBuilder<> Builder(Preheader->getTerminator()); + Value *NewCmpOp = ShouldNegateOp2 + ? Builder.CreateSub(Op1, Op2, "invariant.op", + /*HasNUW*/ false, /*HasNSW*/ true) + : Builder.CreateAdd(Op1, Op2, "invariant.op", + /*HasNUW*/ false, /*HasNSW*/ true); + auto *ICmp = cast(&I); + ICmp->setPredicate(Pred); + ICmp->setOperand(0, VariantOp); + ICmp->setOperand(1, NewCmpOp); + eraseInstruction(cast(*LHS), SafetyInfo, MSSAU); + return true; +} + +static bool hoistArithmetics(Instruction &I, Loop &L, ScalarEvolution &SE, ICFLoopSafetyInfo &SafetyInfo, - MemorySSAUpdater &MSSAU, - AssumptionCache *AC, DominatorTree *DT) { + MemorySSAUpdater &MSSAU, AssumptionCache *AC, + DominatorTree *DT) { // Optimize complex patterns, such as (x < INV1 && x < INV2), turning them // into (x < min(INV1, INV2)), and hoisting the invariant part of this // expression out of the loop. @@ -2567,6 +2662,13 @@ return true; } + // Try to hoist add/sub's by reassociation. + if (hoistAddSub(I, L, SE, SafetyInfo, MSSAU)) { + ++NumHoisted; + ++NumAddSubHoisted; + return true; + } + return false; } Index: llvm/test/Transforms/LICM/hoist-add-sub.ll =================================================================== --- llvm/test/Transforms/LICM/hoist-add-sub.ll +++ llvm/test/Transforms/LICM/hoist-add-sub.ll @@ -1,18 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -passes=licm -verify-memoryssa < %s | FileCheck %s -; TODO: x - iv < 4 ==> iv > x - 4 +; x - iv < 4 ==> iv > x - 4 define i32 @test_01(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_01 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0:![0-9]+]] ; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 [[X]], 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[ARITH:%.*]] = sub nsw i32 [[X]], [[IV]] -; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4 +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp sgt i32 [[IV]], [[INVARIANT_OP]] ; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] @@ -165,18 +165,18 @@ } -; TODO: x + iv < 4 ==> iv < 4 - x +; x + iv < 4 ==> iv < 4 - x define i32 @test_02(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_02 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0]] ; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 4, [[X]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[ARITH:%.*]] = add nsw i32 [[X]], [[IV]] -; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4 +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]] ; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] @@ -278,18 +278,18 @@ ret i32 -2 } -; TODO: iv - x < 4 ==> iv < 4 + x +; iv - x < 4 ==> iv < 4 + x define i32 @test_03(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_03 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG1:![0-9]+]] ; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add nsw i32 4, [[X]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[ARITH:%.*]] = sub nsw i32 [[IV]], [[X]] -; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4 +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]] ; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] @@ -391,18 +391,18 @@ ret i32 -2 } -; TODO: iv + x < 4 ==> iv < 4 - x +; iv + x < 4 ==> iv < 4 - x define i32 @test_04(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_04 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0]] ; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 4, [[X]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[ARITH:%.*]] = add nsw i32 [[IV]], [[X]] -; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4 +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]] ; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]