Index: llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -5649,6 +5649,44 @@ DeadInsts.emplace_back(OperandIsInstr); } +static bool AllUseInBB(Instruction *I, BasicBlock *BB) { + for (User *U : I->users()) { + Instruction *UI = cast(U); + if (UI->getParent() != BB) + return false; + } + return true; +} + +// Trying to hoist the IVInc to loop header if all IVInc user is in +// the loop header. It will help backend to generate post index load/store +// when the latch block is different from loop header block. +static bool CanHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, + const LSRUse &LU, Instruction *IVIncInsertPos, + Loop *L) { + if (LU.Kind != LSRUse::Address) + return false; + + BasicBlock *LHeader = L->getHeader(); + if (IVIncInsertPos->getParent() == LHeader) + return false; + + if (IVIncInsertPos->getParent() != L->getLoopLatch()) + return false; + + Instruction *User = dyn_cast(Fixup.OperandValToReplace); + if (!User || !AllUseInBB(User, LHeader)) + return false; + + Instruction *I = Fixup.UserInst; + if ((isa(I) && + TTI.isIndexedLoadLegal(TTI.MIM_PostInc, I->getType())) || + (isa(I) && + TTI.isIndexedStoreLegal(TTI.MIM_PostInc, I->getType()))) + return true; + return false; +} + /// Rewrite all the fixup locations with new values, following the chosen /// solution. void LSRInstance::ImplementSolution( @@ -5657,8 +5695,6 @@ // we can remove them after we are done working. SmallVector DeadInsts; - Rewriter.setIVIncInsertPos(L, IVIncInsertPos); - // Mark phi nodes that terminate chains so the expander tries to reuse them. for (const IVChain &Chain : IVChainVec) { if (PHINode *PN = dyn_cast(Chain.tailUserInst())) @@ -5666,11 +5702,17 @@ } // Expand the new value definitions and update the users. - for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) - for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) { - Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts); + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + const LSRUse &LU = Uses[LUIdx]; + for (const LSRFixup &Fixup : LU.Fixups) { + Instruction *InsertPos = CanHoistIVInc(TTI, Fixup, LU, IVIncInsertPos, L) + ? L->getHeader()->getTerminator() + : IVIncInsertPos; + Rewriter.setIVIncInsertPos(L, InsertPos); + Rewrite(LU, Fixup, *Solution[LUIdx], DeadInsts); Changed = true; } + } for (const IVChain &Chain : IVChainVec) { GenerateIVChain(Chain, DeadInsts); Index: llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll =================================================================== --- llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll +++ llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll @@ -13,11 +13,10 @@ ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: .LBB0_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: ldr w9, [x1], #4 ; CHECK-NEXT: cbnz w9, .LBB0_5 ; CHECK-NEXT: // %bb.3: // %for.cond ; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: add x1, x1, #4 ; CHECK-NEXT: subs x8, x8, #1 ; CHECK-NEXT: b.ne .LBB0_2 ; CHECK-NEXT: .LBB0_4: Index: llvm/test/Transforms/LoopStrengthReduce/ivincs-hoist.ll =================================================================== --- llvm/test/Transforms/LoopStrengthReduce/ivincs-hoist.ll +++ llvm/test/Transforms/LoopStrengthReduce/ivincs-hoist.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-reduce -S < %s | FileCheck %s +target triple = "aarch64-unknown-linux-gnu" target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128" define i32 @test(i32 %c, ptr %a, ptr %b) { @@ -12,15 +13,15 @@ ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[C]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[LSR_IV:%.*]], i64 4 ; CHECK-NEXT: [[LSR_IV_NEXT:%.*]] = add nsw i64 [[LSR_IV1:%.*]], -1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[RETURN_LOOPEXIT:%.*]], label [[FOR_BODY]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1]] = phi i64 [ [[LSR_IV_NEXT]], [[FOR_COND:%.*]] ], [ [[WIDE_TRIP_COUNT]], [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[LSR_IV]] = phi ptr [ [[UGLYGEP]], [[FOR_COND]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[UGLYGEP:%.*]], [[FOR_COND]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[LSR_IV]], align 4 ; CHECK-NEXT: [[TOBOOL3_NOT:%.*]] = icmp eq i32 [[VAL]], 0 +; CHECK-NEXT: [[UGLYGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 4 ; CHECK-NEXT: br i1 [[TOBOOL3_NOT]], label [[FOR_COND]], label [[RETURN_LOOPEXIT]] ; CHECK: return.loopexit: ; CHECK-NEXT: [[RETVAL_1_PH:%.*]] = phi i32 [ 1, [[FOR_BODY]] ], [ 0, [[FOR_COND]] ]