Index: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h @@ -476,6 +476,10 @@ /// calculation for the instructions in a loop. bool canMacroFuseCmp() const; + /// \return True is LSR should make efforts to create/preserve post-inc + /// addressing mode expressions. + bool shouldFavorPostInc() const; + /// \brief Return true if the target supports masked load/store /// AVX2 and AVX-512 targets allow masks for consecutive load and store bool isLegalMaskedStore(Type *DataType) const; @@ -886,6 +890,21 @@ bool areInlineCompatible(const Function *Caller, const Function *Callee) const; + /// \brief The type of load/store indexing. + enum MemIndexedMode { + MIM_Unindexed, ///< No indexing. + MIM_PreInc, ///< Pre-incrementing. + MIM_PreDec, ///< Pre-decrementing. + MIM_PostInc, ///< Post-incrementing. + MIM_PostDec ///< Post-decrementing. + }; + + /// \returns True if the specified indexed load for the given type is legal. + bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const; + + /// \returns True if the specified indexed store for the given type is legal. + bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const; + /// \returns The bitwidth of the largest vector type that should be used to /// load/store in the given address space. unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; @@ -994,6 +1013,7 @@ virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) = 0; virtual bool canMacroFuseCmp() = 0; + virtual bool shouldFavorPostInc() const = 0; virtual bool isLegalMaskedStore(Type *DataType) = 0; virtual bool isLegalMaskedLoad(Type *DataType) = 0; virtual bool isLegalMaskedScatter(Type *DataType) = 0; @@ -1109,6 +1129,8 @@ unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0; virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const = 0; + virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0; + virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0; virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0; virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0; virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0; @@ -1216,6 +1238,9 @@ bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); } + bool shouldFavorPostInc() const override { + return Impl.shouldFavorPostInc(); + } bool isLegalMaskedStore(Type *DataType) override { return Impl.isLegalMaskedStore(DataType); } @@ -1470,6 +1495,12 @@ const Function *Callee) const override { return Impl.areInlineCompatible(Caller, Callee); } + bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override { + return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout()); + } + bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override { + return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout()); + } unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override { return Impl.getLoadStoreVecRegBitWidth(AddrSpace); } Index: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -248,6 +248,8 @@ bool canMacroFuseCmp() { return false; } + bool shouldFavorPostInc() const { return false; } + bool isLegalMaskedStore(Type *DataType) { return false; } bool isLegalMaskedLoad(Type *DataType) { return false; } @@ -511,6 +513,16 @@ Callee->getFnAttribute("target-features")); } + bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, + const DataLayout &DL) const { + return false; + } + + bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, + const DataLayout &DL) const { + return false; + } + unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } Index: llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h @@ -111,6 +111,22 @@ return static_cast(this)->getTLI(); } + static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { + switch (M) { + case TTI::MIM_Unindexed: + return ISD::UNINDEXED; + case TTI::MIM_PreInc: + return ISD::PRE_INC; + case TTI::MIM_PreDec: + return ISD::PRE_DEC; + case TTI::MIM_PostInc: + return ISD::POST_INC; + case TTI::MIM_PostDec: + return ISD::POST_DEC; + } + llvm_unreachable("Unexpected MemIndexedMode"); + } + protected: explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL) : BaseT(DL) {} @@ -157,6 +173,18 @@ return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); } + bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, + const DataLayout &DL) const { + EVT VT = getTLI()->getValueType(DL, Ty); + return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); + } + + bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, + const DataLayout &DL) const { + EVT VT = getTLI()->getValueType(DL, Ty); + return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); + } + bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); } Index: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp +++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp @@ -159,6 +159,10 @@ return TTIImpl->canMacroFuseCmp(); } +bool TargetTransformInfo::shouldFavorPostInc() const { + return TTIImpl->shouldFavorPostInc(); +} + bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const { return TTIImpl->isLegalMaskedStore(DataType); } @@ -555,6 +559,16 @@ return TTIImpl->areInlineCompatible(Caller, Callee); } +bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode, + Type *Ty) const { + return TTIImpl->isIndexedLoadLegal(Mode, Ty); +} + +bool TargetTransformInfo::isIndexedStoreLegal(MemIndexedMode Mode, + Type *Ty) const { + return TTIImpl->isIndexedStoreLegal(Mode, Ty); +} + unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const { return TTIImpl->getLoadStoreVecRegBitWidth(AS); } Index: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -185,6 +185,8 @@ unsigned AS = UnknownAddressSpace) { return MemAccessTy(Type::getVoidTy(Ctx), AS); } + + Type *getType() { return MemTy; } }; /// This class holds data which is used to order reuse candidates. @@ -1040,12 +1042,14 @@ void RateRegister(const SCEV *Reg, SmallPtrSetImpl &Regs, const Loop *L, - ScalarEvolution &SE, DominatorTree &DT); + ScalarEvolution &SE, DominatorTree &DT, + const TargetTransformInfo &TTI); void RatePrimaryRegister(const SCEV *Reg, SmallPtrSetImpl &Regs, const Loop *L, ScalarEvolution &SE, DominatorTree &DT, - SmallPtrSetImpl *LoserRegs); + SmallPtrSetImpl *LoserRegs, + const TargetTransformInfo &TTI); }; /// An operand value in an instruction which is to be replaced with some @@ -1194,7 +1198,8 @@ void Cost::RateRegister(const SCEV *Reg, SmallPtrSetImpl &Regs, const Loop *L, - ScalarEvolution &SE, DominatorTree &DT) { + ScalarEvolution &SE, DominatorTree &DT, + const TargetTransformInfo &TTI) { if (const SCEVAddRecExpr *AR = dyn_cast(Reg)) { // If this is an addrec for another loop, it should be an invariant // with respect to L since L is the innermost loop (at least @@ -1215,13 +1220,28 @@ ++C.NumRegs; return; } - C.AddRecCost += 1; /// TODO: This should be a function of the stride. + + unsigned LoopCost = 1; + if (TTI.shouldFavorPostInc()) { + const SCEV *LoopStep = AR->getStepRecurrence(SE); + if (isa(LoopStep)) { + // Check if a post-indexed load/store can be used. + if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) || + TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) { + const SCEV *LoopStart = AR->getStart(); + if (!isa(LoopStart) && + SE.isLoopInvariant(LoopStart, L)) + LoopCost = 0; + } + } + } + C.AddRecCost += LoopCost; // Add the step value register, if it needs one. // TODO: The non-affine case isn't precisely modeled here. if (!AR->isAffine() || !isa(AR->getOperand(1))) { if (!Regs.count(AR->getOperand(1))) { - RateRegister(AR->getOperand(1), Regs, L, SE, DT); + RateRegister(AR->getOperand(1), Regs, L, SE, DT, TTI); if (isLoser()) return; } @@ -1249,13 +1269,14 @@ SmallPtrSetImpl &Regs, const Loop *L, ScalarEvolution &SE, DominatorTree &DT, - SmallPtrSetImpl *LoserRegs) { + SmallPtrSetImpl *LoserRegs, + const TargetTransformInfo &TTI) { if (LoserRegs && LoserRegs->count(Reg)) { Lose(); return; } if (Regs.insert(Reg).second) { - RateRegister(Reg, Regs, L, SE, DT); + RateRegister(Reg, Regs, L, SE, DT, TTI); if (LoserRegs && isLoser()) LoserRegs->insert(Reg); } @@ -1279,7 +1300,7 @@ Lose(); return; } - RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs); + RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs, TTI); if (isLoser()) return; } @@ -1288,7 +1309,7 @@ Lose(); return; } - RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs); + RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs, TTI); if (isLoser()) return; } @@ -3465,12 +3486,45 @@ return S; } +/// Return true if the SCEV represents a value that may end up as a +/// post-increment operation. +static bool mayUsePostIncMode(const TargetTransformInfo &TTI, + LSRUse &LU, const SCEV *S, const Loop *L, + ScalarEvolution &SE) { + if (LU.Kind != LSRUse::Address || + !LU.AccessTy.getType()->isIntOrIntVectorTy()) + return false; + const SCEVAddRecExpr *AR = dyn_cast(S); + if (!AR) + return false; + const SCEV *LoopStep = AR->getStepRecurrence(SE); + if (!isa(LoopStep)) + return false; + if (LU.AccessTy.getType()->getScalarSizeInBits() != + LoopStep->getType()->getScalarSizeInBits()) + return false; + // Check if a post-indexed load/store can be used. + if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) || + TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) { + const SCEV *LoopStart = AR->getStart(); + if (!isa(LoopStart) && SE.isLoopInvariant(LoopStart, L)) + return true; + } + return false; +} + /// \brief Helper function for LSRInstance::GenerateReassociations. void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, const Formula &Base, unsigned Depth, size_t Idx, bool IsScaledReg) { const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; + // Don't generate reassociations for the base register of a value that + // may generate a post-increment operator. The reason is that the + // reassociations cause extra base+register formula to be created, + // and possibly chosen, but the post-increment is more efficient. + if (TTI.shouldFavorPostInc() && mayUsePostIncMode(TTI, LU, BaseReg, L, SE)) + return; SmallVector AddOps; const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE); if (Remainder) @@ -4039,6 +4093,9 @@ NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm; if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, NewF)) { + if (TTI.shouldFavorPostInc() && + mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE)) + continue; if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) continue; NewF = F;