Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -638,13 +638,14 @@ DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const; - /// \return True is LSR should make efforts to create/preserve post-inc - /// addressing mode expressions. - bool shouldFavorPostInc() const; + enum AddressingModeKind { + AMK_PreIndexed, + AMK_PostIndexed, + AMK_None + }; - /// Return true if LSR should make efforts to generate indexed addressing - /// modes that operate across loop iterations. - bool shouldFavorBackedgeIndex(const Loop *L) const; + /// Return the preferred addressing mode LSR should make efforts to generate. + AddressingModeKind getAddressingMode(const Loop *L) const; /// Return true if the target supports masked store. bool isLegalMaskedStore(Type *DataType, Align Alignment) const; @@ -1459,8 +1460,7 @@ virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) = 0; - virtual bool shouldFavorPostInc() const = 0; - virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0; + virtual AddressingModeKind getAddressingMode(const Loop *L) const = 0; virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0; virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0; virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0; @@ -1803,9 +1803,8 @@ TargetLibraryInfo *LibInfo) override { return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo); } - bool shouldFavorPostInc() const override { return Impl.shouldFavorPostInc(); } - bool shouldFavorBackedgeIndex(const Loop *L) const override { - return Impl.shouldFavorBackedgeIndex(L); + AddressingModeKind getAddressingMode(const Loop *L) const override { + return Impl.getAddressingMode(L); } bool isLegalMaskedStore(Type *DataType, Align Alignment) override { return Impl.isLegalMaskedStore(DataType, Alignment); Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -209,9 +209,9 @@ return false; } - bool shouldFavorPostInc() const { return false; } - - bool shouldFavorBackedgeIndex(const Loop *L) const { return false; } + TTI::AddressingModeKind getAddressingMode(const Loop *L) const { + return TTI::AMK_None; + } bool isLegalMaskedStore(Type *DataType, Align Alignment) const { return false; Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -410,12 +410,9 @@ return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo); } -bool TargetTransformInfo::shouldFavorPostInc() const { - return TTIImpl->shouldFavorPostInc(); -} - -bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const { - return TTIImpl->shouldFavorBackedgeIndex(L); +TTI::AddressingModeKind +TargetTransformInfo::getAddressingMode(const Loop *L) const { + return TTIImpl->getAddressingMode(L); } bool TargetTransformInfo::isLegalMaskedStore(Type *DataType, Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -103,8 +103,7 @@ bool enableInterleavedAccessVectorization() { return true; } - bool shouldFavorBackedgeIndex(const Loop *L) const; - bool shouldFavorPostInc() const; + TTI::AddressingModeKind getAddressingMode(const Loop *L) const; /// Floating-point computation using ARMv8 AArch32 Advanced /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -100,18 +100,18 @@ return MatchExact && MatchSubset; } -bool ARMTTIImpl::shouldFavorBackedgeIndex(const Loop *L) const { +TTI::AddressingModeKind ARMTTIImpl::getAddressingMode(const Loop *L) const { if (L->getHeader()->getParent()->hasOptSize()) - return false; - if (ST->hasMVEIntegerOps()) - return false; - return ST->isMClass() && ST->isThumb2() && L->getNumBlocks() == 1; -} + return TTI::AMK_PostIndexed; + + if (!ST->hasMVEIntegerOps() && ST->isMClass() && ST->isThumb2() && + L->getNumBlocks() == 1) + return TTI::AMK_PreIndexed; -bool ARMTTIImpl::shouldFavorPostInc() const { if (ST->hasMVEIntegerOps()) - return true; - return false; + return TTI::AMK_PostIndexed; + + return TTI::AMK_None; } Optional Index: llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h =================================================================== --- llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -67,7 +67,7 @@ TTI::PeelingPreferences &PP); /// Bias LSR towards creating post-increment opportunities. - bool shouldFavorPostInc() const; + AddressingModeKind getAddressingMode(const Loop *L) const; // L1 cache prefetch. unsigned getPrefetchDistance() const override; Index: llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -80,8 +80,8 @@ } } -bool HexagonTTIImpl::shouldFavorPostInc() const { - return true; +AddressingModeKind::getAddressingMode(const Loop *L) const { + return AMK_PostIndexed; } /// --- Vector TTI begin --- Index: llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1227,13 +1227,15 @@ /// Tally up interesting quantities from the given register. void Cost::RateRegister(const Formula &F, const SCEV *Reg, SmallPtrSetImpl &Regs) { + TTI::AddressingModeKind AMK = TTI->getAddressingMode(L); + if (const SCEVAddRecExpr *AR = dyn_cast(Reg)) { // If this is an addrec for another loop, it should be an invariant // with respect to L since L is the innermost loop (at least // for now LSR only handles innermost loops). if (AR->getLoop() != L) { // If the AddRec exists, consider it's register free and leave it alone. - if (isExistingPhi(AR, *SE) && !TTI->shouldFavorPostInc()) + if (isExistingPhi(AR, *SE) && AMK != TTI::AMK_PostIndexed) return; // It is bad to allow LSR for current loop to add induction variables @@ -1254,13 +1256,11 @@ // If the step size matches the base offset, we could use pre-indexed // addressing. - if (TTI->shouldFavorBackedgeIndex(L)) { + if (AMK == TTI::AMK_PreIndexed) { if (auto *Step = dyn_cast(AR->getStepRecurrence(*SE))) if (Step->getAPInt() == F.BaseOffset) LoopCost = 0; - } - - if (TTI->shouldFavorPostInc()) { + } else if (AMK == TTI::AMK_PostIndexed) { const SCEV *LoopStep = AR->getStepRecurrence(*SE); if (isa(LoopStep)) { const SCEV *LoopStart = AR->getStart(); @@ -3575,7 +3575,8 @@ // may generate a post-increment operator. The reason is that the // reassociations cause extra base+register formula to be created, // and possibly chosen, but the post-increment is more efficient. - if (TTI.shouldFavorPostInc() && mayUsePostIncMode(TTI, LU, BaseReg, L, SE)) + TTI::AddressingModeKind AMK = TTI.getAddressingMode(L); + if (AMK == TTI::AMK_PostIndexed && mayUsePostIncMode(TTI, LU, BaseReg, L, SE)) return; SmallVector AddOps; const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE); @@ -4239,7 +4240,7 @@ NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm; if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, NewF)) { - if (TTI.shouldFavorPostInc() && + if (TTI.getAddressingMode(this->L) == TTI::AMK_PostIndexed && mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE)) continue; if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) @@ -4679,7 +4680,7 @@ /// If we are over the complexity limit, filter out any post-inc prefering /// variables to only post-inc values. void LSRInstance::NarrowSearchSpaceByFilterPostInc() { - if (!TTI.shouldFavorPostInc()) + if (TTI.getAddressingMode(L) != TTI::AMK_PostIndexed) return; if (EstimateSearchSpaceComplexity() < ComplexityLimit) return; @@ -4978,7 +4979,8 @@ // This can sometimes (notably when trying to favour postinc) lead to // sub-optimial decisions. There it is best left to the cost modelling to // get correct. - if (!TTI.shouldFavorPostInc() || LU.Kind != LSRUse::Address) { + if (TTI.getAddressingMode(L) != TTI::AMK_PostIndexed || + LU.Kind != LSRUse::Address) { int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size()); for (const SCEV *Reg : ReqRegs) { if ((F.ScaledReg && F.ScaledReg == Reg) || @@ -5560,7 +5562,7 @@ TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU) : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L), MSSAU(MSSAU), FavorBackedgeIndex(EnableBackedgeIndexing && - TTI.shouldFavorBackedgeIndex(L)) { + TTI.getAddressingMode(L) == TTI::AMK_PreIndexed) { // If LoopSimplify form is not available, stay out of trouble. if (!L->isLoopSimplifyForm()) return; Index: llvm/test/CodeGen/ARM/loop-align-cortex-m.ll =================================================================== --- llvm/test/CodeGen/ARM/loop-align-cortex-m.ll +++ llvm/test/CodeGen/ARM/loop-align-cortex-m.ll @@ -1,11 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv7m-none-eabi %s -mcpu=cortex-m3 -o - | FileCheck %s ; RUN: llc -mtriple=thumbv7m-none-eabi %s -mcpu=cortex-m4 -o - | FileCheck %s ; RUN: llc -mtriple=thumbv8m-none-eabi %s -mcpu=cortex-m33 -o - | FileCheck %s define void @test_loop_alignment(i32* %in, i32* %out) optsize { ; CHECK-LABEL: test_loop_alignment: -; CHECK: mov{{.*}}, #0 -; CHECK: .p2align 2 +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: mov.w r2, #1024 +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: .LBB0_1: @ %loop +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr r3, [r0], #4 +; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: add.w r3, r3, r3, lsl #2 +; CHECK-NEXT: str r3, [r1], #4 +; CHECK-NEXT: bne .LBB0_1 +; CHECK-NEXT: @ %bb.2: @ %end +; CHECK-NEXT: bx lr entry: br label %loop @@ -27,8 +38,17 @@ define void @test_loop_alignment_minsize(i32* %in, i32* %out) minsize { ; CHECK-LABEL: test_loop_alignment_minsize: -; CHECK: movs {{r[0-9]+}}, #0 -; CHECK-NOT: .p2align +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: mov.w r2, #1024 +; CHECK-NEXT: .LBB1_1: @ %loop +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldm r0!, {r3} +; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: add.w r3, r3, r3, lsl #2 +; CHECK-NEXT: stm r1!, {r3} +; CHECK-NEXT: bne .LBB1_1 +; CHECK-NEXT: @ %bb.2: @ %end +; CHECK-NEXT: bx lr entry: br label %loop Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll @@ -96,16 +96,15 @@ define void @cbz_exit_minsize(i32* %in, i32* %res) #0 { ; CHECK-LABEL: cbz_exit_minsize: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: mov.w r2, #-1 ; CHECK-NEXT: .LBB3_1: @ %loop ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr.w r3, [r0, r2, lsl #2] +; CHECK-NEXT: ldm r0!, {r3} ; CHECK-NEXT: adds r2, #1 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: bne .LBB3_1 ; CHECK-NEXT: @ %bb.2: @ %exit -; CHECK-NEXT: subs r0, r2, #1 -; CHECK-NEXT: str r0, [r1] +; CHECK-NEXT: str r2, [r1] ; CHECK-NEXT: bx lr entry: br label %loop @@ -126,16 +125,15 @@ define void @cbnz_exit_minsize(i32* %in, i32* %res) #0 { ; CHECK-LABEL: cbnz_exit_minsize: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: mov.w r2, #-1 ; CHECK-NEXT: .LBB4_1: @ %loop ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr.w r3, [r0, r2, lsl #2] +; CHECK-NEXT: ldm r0!, {r3} ; CHECK-NEXT: adds r2, #1 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: beq .LBB4_1 ; CHECK-NEXT: @ %bb.2: @ %exit -; CHECK-NEXT: subs r0, r2, #1 -; CHECK-NEXT: str r0, [r1] +; CHECK-NEXT: str r2, [r1] ; CHECK-NEXT: bx lr entry: br label %loop