Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -281,6 +281,9 @@ /// target-independent defaults. void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; + /// \brief Get target-customized default threshold for loop rotation. + unsigned getLoopRotationDefaultThreshold() const; + /// @} /// \name Scalar Target Information @@ -599,6 +602,7 @@ virtual bool isSourceOfDivergence(const Value *V) = 0; virtual bool isLoweredToCall(const Function *F) = 0; virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0; + virtual unsigned getLoopRotationDefaultThreshold() const = 0; virtual bool isLegalAddImmediate(int64_t Imm) = 0; virtual bool isLegalICmpImmediate(int64_t Imm) = 0; virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, @@ -728,6 +732,9 @@ void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) override { return Impl.getUnrollingPreferences(L, UP); } + unsigned getLoopRotationDefaultThreshold() const override { + return Impl.getLoopRotationDefaultThreshold(); + } bool isLegalAddImmediate(int64_t Imm) override { return Impl.isLegalAddImmediate(Imm); } Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -195,6 +195,8 @@ void getUnrollingPreferences(Loop *, TTI::UnrollingPreferences &) {} + unsigned getLoopRotationDefaultThreshold() const { return 16; } + bool isLegalAddImmediate(int64_t Imm) { return false; } bool isLegalICmpImmediate(int64_t Imm) { return false; } Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -271,6 +271,8 @@ UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps; } + unsigned getLoopRotationDefaultThreshold() const { return 16; } + /// @} /// \name Vector TTI Implementations Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -96,6 +96,10 @@ return TTIImpl->getUnrollingPreferences(L, UP); } +unsigned TargetTransformInfo::getLoopRotationDefaultThreshold() const { + return TTIImpl->getLoopRotationDefaultThreshold(); +} + bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { return TTIImpl->isLegalAddImmediate(Imm); } Index: lib/Target/X86/X86TargetTransformInfo.h =================================================================== --- lib/Target/X86/X86TargetTransformInfo.h +++ lib/Target/X86/X86TargetTransformInfo.h @@ -53,6 +53,7 @@ /// \name Scalar TTI Implementations /// @{ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + unsigned getLoopRotationDefaultThreshold() const; /// @} Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -1486,3 +1486,7 @@ // correct. return (CallerBits & CalleeBits) == CalleeBits; } + +unsigned X86TTIImpl::getLoopRotationDefaultThreshold() const { + return ST->getCPU() == "lakemont" ? 2 : 16; +} Index: lib/Transforms/Scalar/LoopRotation.cpp =================================================================== --- lib/Transforms/Scalar/LoopRotation.cpp +++ lib/Transforms/Scalar/LoopRotation.cpp @@ -42,8 +42,8 @@ #define DEBUG_TYPE "loop-rotate" static cl::opt -DefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden, - cl::desc("The default maximum header size for automatic loop rotation")); +RotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden, + cl::desc("The maximum header size for automatic loop rotation")); STATISTIC(NumRotated, "Number of loops rotated"); @@ -562,19 +562,28 @@ return MadeChange; } +/// Choose max header size based on pass parameter, options and target +/// preferences. +static unsigned chooseMaxHeaderSize(int SpecifiedThreshold, + const TargetTransformInfo *TTI) { + if (SpecifiedThreshold == -1) + return RotationThreshold.getNumOccurrences() > 0 + ? RotationThreshold + : TTI->getLoopRotationDefaultThreshold(); + else + return unsigned(SpecifiedThreshold); +} + namespace { class LoopRotate : public LoopPass { - unsigned MaxHeaderSize; + int SpecifiedThreshold; public: static char ID; // Pass ID, replacement for typeid - LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) { + LoopRotate(int SpecifiedMaxHeaderSize = -1) + : LoopPass(ID), SpecifiedThreshold(SpecifiedMaxHeaderSize) { initializeLoopRotatePass(*PassRegistry::getPassRegistry()); - if (SpecifiedMaxHeaderSize == -1) - MaxHeaderSize = DefaultRotationThreshold; - else - MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize); } // LCSSA form makes instruction renaming easier. @@ -597,7 +606,8 @@ auto *SEWP = getAnalysisIfAvailable(); auto *SE = SEWP ? &SEWP->getSE() : nullptr; - return iterativelyRotateLoop(L, MaxHeaderSize, LI, TTI, AC, DT, SE); + return iterativelyRotateLoop( + L, chooseMaxHeaderSize(SpecifiedThreshold, TTI), LI, TTI, AC, DT, SE); } }; } Index: test/Transforms/LoopRotate/target-default.ll =================================================================== --- /dev/null +++ test/Transforms/LoopRotate/target-default.ll @@ -0,0 +1,39 @@ +; REQUIRES: asserts +; RUN: opt < %s -march=x86 -mcpu=pentium -S -loop-rotate -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=PENTIUM +; RUN: opt < %s -march=x86 -mcpu=lakemont -S -loop-rotate -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=LMT +; RUN: opt < %s -march=x86 -mcpu=pentium -S -loop-rotate -rotation-max-header-size=0 -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=PENTIUM-OPT +; RUN: opt < %s -march=x86 -mcpu=lakemont -S -loop-rotate -rotation-max-header-size=16 -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=LMT-OPT + +; Loop should be rotated for Pentium but not for Lakemont. +; PENTIUM: rotating Loop at depth 1 +; LMT-NOT: rotating Loop at depth 1 + +; Specification of -rotation-max-header-size should suppress default +; target threshold. +; PENTIUM-OPT-NOT: rotating Loop at depth 1 +; LMT-OPT: rotating Loop at depth 1 + +target triple = "i386-unknown-linux-gnu" + +declare void @use(i32*, i32) + +define void @test(i32* %x, i32 %y) { +entry: + br label %for.cond + +for.cond: + %x.addr.0 = phi i32* [ %x, %entry ], [ %incdec.ptr, %for.body ] + %0 = load i32, i32* %x.addr.0, align 4 + %cmp = icmp sgt i32 %0, 0 + %cmp1 = icmp sgt i32 %y, 0 + %or.cond = and i1 %cmp, %cmp1 + br i1 %or.cond, label %for.body, label %for.end + +for.body: + tail call void @use(i32* %x.addr.0, i32 %y) + %incdec.ptr = getelementptr inbounds i32, i32* %x.addr.0, i64 1 + br label %for.cond + +for.end: + ret void +}