Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -281,6 +281,9 @@ /// target-independent defaults. void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; + /// \brief Get target-customized default threshold for loop rotation. + unsigned getLoopRotationDefaultThreshold() const; + /// @} /// \name Scalar Target Information @@ -599,6 +602,7 @@ virtual bool isSourceOfDivergence(const Value *V) = 0; virtual bool isLoweredToCall(const Function *F) = 0; virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0; + virtual unsigned getLoopRotationDefaultThreshold() const = 0; virtual bool isLegalAddImmediate(int64_t Imm) = 0; virtual bool isLegalICmpImmediate(int64_t Imm) = 0; virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, @@ -728,6 +732,9 @@ void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) override { return Impl.getUnrollingPreferences(L, UP); } + unsigned getLoopRotationDefaultThreshold() const override { + return Impl.getLoopRotationDefaultThreshold(); + } bool isLegalAddImmediate(int64_t Imm) override { return Impl.isLegalAddImmediate(Imm); } Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -195,6 +195,8 @@ void getUnrollingPreferences(Loop *, TTI::UnrollingPreferences &) {} + unsigned getLoopRotationDefaultThreshold() const { return 16; } + bool isLegalAddImmediate(int64_t Imm) { return false; } bool isLegalICmpImmediate(int64_t Imm) { return false; } Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -271,6 +271,8 @@ UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps; } + unsigned getLoopRotationDefaultThreshold() const { return 16; } + /// @} /// \name Vector TTI Implementations Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -15,6 +15,7 @@ #ifndef LLVM_TRANSFORMS_SCALAR_H #define LLVM_TRANSFORMS_SCALAR_H +#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include @@ -185,7 +186,7 @@ // // LoopRotate - This pass is a simple loop rotating pass. // -Pass *createLoopRotatePass(int MaxHeaderSize = -1); +Pass *createLoopRotatePass(Optional MaxHeaderSize = None); //===----------------------------------------------------------------------===// // Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -96,6 +96,10 @@ return TTIImpl->getUnrollingPreferences(L, UP); } +unsigned TargetTransformInfo::getLoopRotationDefaultThreshold() const { + return TTIImpl->getLoopRotationDefaultThreshold(); +} + bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { return TTIImpl->isLegalAddImmediate(Imm); } Index: lib/Target/X86/X86TargetTransformInfo.h =================================================================== --- lib/Target/X86/X86TargetTransformInfo.h +++ lib/Target/X86/X86TargetTransformInfo.h @@ -53,6 +53,7 @@ /// \name Scalar TTI Implementations /// @{ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + unsigned getLoopRotationDefaultThreshold() const; /// @} Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -1486,3 +1486,7 @@ // correct. return (CallerBits & CalleeBits) == CalleeBits; } + +unsigned X86TTIImpl::getLoopRotationDefaultThreshold() const { + return ST->getCPU() == "lakemont" ? 2 : 16; +} Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -250,7 +250,7 @@ return; } // Rotate Loop - disable header duplication at -Oz - MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLoopRotatePass(SizeLevel == 2 ? Optional(0) : None)); MPM.add(createLICMPass()); // Hoist loop invariants MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); MPM.add(createCFGSimplificationPass()); @@ -470,7 +470,7 @@ // Re-rotate loops in all our loop nests. These may have fallout out of // rotated form due to GVN or other transformations, and the vectorizer relies // on the rotated form. Disable header duplication at -Oz. - MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLoopRotatePass(SizeLevel == 2 ? Optional(0) : None)); // Distribute loops to allow partial vectorization. I.e. isolate dependences // into separate loop that would otherwise inhibit vectorization. Index: lib/Transforms/Scalar/LoopRotation.cpp =================================================================== --- lib/Transforms/Scalar/LoopRotation.cpp +++ lib/Transforms/Scalar/LoopRotation.cpp @@ -42,8 +42,8 @@ #define DEBUG_TYPE "loop-rotate" static cl::opt -DefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden, - cl::desc("The default maximum header size for automatic loop rotation")); +RotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden, + cl::desc("The maximum header size for automatic loop rotation")); STATISTIC(NumRotated, "Number of loops rotated"); @@ -562,19 +562,28 @@ return MadeChange; } +/// Choose max header size based on pass parameter, options and target +/// preferences. +static unsigned chooseMaxHeaderSize(Optional SpecifiedThreshold, + const TargetTransformInfo *TTI) { + if (SpecifiedThreshold.hasValue()) + return *SpecifiedThreshold; + else + return RotationThreshold.getNumOccurrences() > 0 + ? RotationThreshold + : TTI->getLoopRotationDefaultThreshold(); +} + namespace { class LoopRotate : public LoopPass { - unsigned MaxHeaderSize; + Optional SpecifiedThreshold; public: static char ID; // Pass ID, replacement for typeid - LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) { + LoopRotate(Optional SpecifiedMaxHeaderSize = None) + : LoopPass(ID), SpecifiedThreshold(SpecifiedMaxHeaderSize) { initializeLoopRotatePass(*PassRegistry::getPassRegistry()); - if (SpecifiedMaxHeaderSize == -1) - MaxHeaderSize = DefaultRotationThreshold; - else - MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize); } // LCSSA form makes instruction renaming easier. @@ -597,7 +606,8 @@ auto *SEWP = getAnalysisIfAvailable(); auto *SE = SEWP ? &SEWP->getSE() : nullptr; - return iterativelyRotateLoop(L, MaxHeaderSize, LI, TTI, AC, DT, SE); + return iterativelyRotateLoop( + L, chooseMaxHeaderSize(SpecifiedThreshold, TTI), LI, TTI, AC, DT, SE); } }; } @@ -609,6 +619,6 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false) -Pass *llvm::createLoopRotatePass(int MaxHeaderSize) { +Pass *llvm::createLoopRotatePass(Optional MaxHeaderSize) { return new LoopRotate(MaxHeaderSize); } Index: test/Transforms/LoopRotate/target-default.ll =================================================================== --- /dev/null +++ test/Transforms/LoopRotate/target-default.ll @@ -0,0 +1,39 @@ +; REQUIRES: asserts +; RUN: opt < %s -march=x86 -mcpu=pentium -S -loop-rotate -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=PENTIUM +; RUN: opt < %s -march=x86 -mcpu=lakemont -S -loop-rotate -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=LMT +; RUN: opt < %s -march=x86 -mcpu=pentium -S -loop-rotate -rotation-max-header-size=0 -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=PENTIUM-OPT +; RUN: opt < %s -march=x86 -mcpu=lakemont -S -loop-rotate -rotation-max-header-size=16 -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=LMT-OPT + +; Loop should be rotated for Pentium but not for Lakemont. +; PENTIUM: rotating Loop at depth 1 +; LMT-NOT: rotating Loop at depth 1 + +; Specification of -rotation-max-header-size should suppress default +; target threshold. +; PENTIUM-OPT-NOT: rotating Loop at depth 1 +; LMT-OPT: rotating Loop at depth 1 + +target triple = "i386-unknown-linux-gnu" + +declare void @use(i32*, i32) + +define void @test(i32* %x, i32 %y) { +entry: + br label %for.cond + +for.cond: + %x.addr.0 = phi i32* [ %x, %entry ], [ %incdec.ptr, %for.body ] + %0 = load i32, i32* %x.addr.0, align 4 + %cmp = icmp sgt i32 %0, 0 + %cmp1 = icmp sgt i32 %y, 0 + %or.cond = and i1 %cmp, %cmp1 + br i1 %or.cond, label %for.body, label %for.end + +for.body: + tail call void @use(i32* %x.addr.0, i32 %y) + %incdec.ptr = getelementptr inbounds i32, i32* %x.addr.0, i64 1 + br label %for.cond + +for.end: + ret void +}