Index: lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1668,6 +1668,11 @@ ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse); void OptimizeLoopTermCond(); + void UpdatePostIncUsersToPostIncForm(); + bool IsPostIncIVUser(Value *IvOp); + bool IsUsedAfterInc(Value *IvOp, Instruction *IncI, + SmallPtrSetImpl &Visited); + void ChainInstruction(Instruction *UserInst, Instruction *IVOper, SmallVectorImpl &ChainUsersVec); void FinalizeChain(IVChain &Chain); @@ -2063,6 +2068,64 @@ return NewCond; } +bool LSRInstance::IsUsedAfterInc(Value *IvOp, Instruction *PostIncV, + SmallPtrSetImpl &Visited) { + for (Use &U : PostIncV->uses()) { + Instruction *User = cast(U.getUser()); + if (!IU.isIVUserOrOperand(&*User)) + continue; + if (Visited.count(User)) + continue; + Visited.insert(User); + if (User == IvOp || IsUsedAfterInc(IvOp, User, Visited)) + return true; + } + return false; +} + +bool LSRInstance::IsPostIncIVUser(Value *IvOp) { + SmallPtrSet Visited; + for (BasicBlock::iterator I = L->getHeader()->begin(); + PHINode *PN = dyn_cast(I); ++I) { + + Visited.insert(PN); + + if (!IU.isIVUserOrOperand(&*I)) + continue; + + // Directly use PHINode, so pre-inc. + if (IvOp == PN) + return false; + // Visit phi's backedge to determine if the IV user uses a post-inc value. + if (Instruction *PostIncV = dyn_cast( + PN->getIncomingValueForBlock(L->getLoopLatch()))) { + return IsUsedAfterInc(IvOp, PostIncV, Visited); + } + } + return false; +} + +/// Use the post-inc expression if an IV user is already a post-inc user. +void LSRInstance::UpdatePostIncUsersToPostIncForm() { + for (IVStrideUse &U : IU) { + Instruction *UserInst = U.getUser(); + if (U.getPostIncLoops().count(L) || isa(UserInst)) + continue; + if (IsPostIncIVUser(U.getOperandValToReplace())) { + U.transformToPostInc(L); + + // Update the insertion point for the loop induction variable increment. + // It must dominate all the post-inc users we just transformed. + BasicBlock *BB = DT.findNearestCommonDominator( + IVIncInsertPos->getParent(), UserInst->getParent()); + if (BB == UserInst->getParent()) + IVIncInsertPos = UserInst; + else if (BB != IVIncInsertPos->getParent()) + IVIncInsertPos = BB->getTerminator(); + } + } +} + /// Change loop terminating condition to use the postinc iv when possible. void LSRInstance::OptimizeLoopTermCond() { @@ -4837,6 +4900,8 @@ return; } + UpdatePostIncUsersToPostIncForm(); + // Start collecting data and preparing for the solver. CollectChains(); CollectInterestingTypesAndFactors(); Index: test/Transforms/LoopStrengthReduce/AArch64/lsr-postinc.ll =================================================================== --- /dev/null +++ test/Transforms/LoopStrengthReduce/AArch64/lsr-postinc.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s | FileCheck %s + +@gvarray = common global [12 x i32] zeroinitializer, align 4 + +target triple = "arm64-unknown-unknown" + +; Check if the post-inc IV users (store and icmp) use the post-inc (%K) value +; and the pre/post-inc values of IV are coalesced into one register value. + +define void @test(i32 %n) { +entry: + %cmp7 = icmp sgt i32 %n, 1 + br i1 %cmp7, label %for.body.preheader, label %for.end + +for.body.preheader: + %n_sext = sext i32 %n to i64 + br label %for.body + +for.body: + +; CHECK: add x[[INC:[0-9]+]], x[[INC]], #1 +; CHECK: str w[[INC]], [x{{[0-9]+}}, x[[INC]], lsl #2] +; CHECK: cmp x[[INC]] + + %K.in = phi i64 [ %n_sext, %for.body.preheader ], [ %K, %for.body ] + %K = add i64 %K.in, 1 + %StoredAddr = getelementptr inbounds [12 x i32], [12 x i32]* @gvarray, i64 0, i64 %K + %StoredValue = trunc i64 %K to i32 + store volatile i32 %StoredValue, i32* %StoredAddr + %cmp = icmp sgt i64 %K, 1 + br i1 %cmp, label %for.body, label %for.end +for.end: + ret void +} +