Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -295,6 +295,9 @@ /// target-independent defaults. void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; + /// \brief Get target-customized default threshold for loop rotation. + unsigned getLoopRotationDefaultThreshold() const; + /// @} /// \name Scalar Target Information @@ -631,6 +634,7 @@ virtual bool isSourceOfDivergence(const Value *V) = 0; virtual bool isLoweredToCall(const Function *F) = 0; virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0; + virtual unsigned getLoopRotationDefaultThreshold() const = 0; virtual bool isLegalAddImmediate(int64_t Imm) = 0; virtual bool isLegalICmpImmediate(int64_t Imm) = 0; virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, @@ -769,6 +773,9 @@ void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) override { return Impl.getUnrollingPreferences(L, UP); } + unsigned getLoopRotationDefaultThreshold() const override { + return Impl.getLoopRotationDefaultThreshold(); + } bool isLegalAddImmediate(int64_t Imm) override { return Impl.isLegalAddImmediate(Imm); } Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -197,6 +197,8 @@ void getUnrollingPreferences(Loop *, TTI::UnrollingPreferences &) {} + unsigned getLoopRotationDefaultThreshold() const { return 16; } + bool isLegalAddImmediate(int64_t Imm) { return false; } bool isLegalICmpImmediate(int64_t Imm) { return false; } Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -273,6 +273,8 @@ UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps; } + unsigned getLoopRotationDefaultThreshold() const { return 16; } + /// @} /// \name Vector TTI Implementations Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -15,6 +15,7 @@ #ifndef LLVM_TRANSFORMS_SCALAR_H #define LLVM_TRANSFORMS_SCALAR_H +#include "llvm/ADT/Optional.h" #include namespace llvm { @@ -183,7 +184,7 @@ // // LoopRotate - This pass is a simple loop rotating pass. // -Pass *createLoopRotatePass(int MaxHeaderSize = -1); +Pass *createLoopRotatePass(Optional MaxHeaderSize = None); //===----------------------------------------------------------------------===// // Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -101,6 +101,10 @@ return TTIImpl->getUnrollingPreferences(L, UP); } +unsigned TargetTransformInfo::getLoopRotationDefaultThreshold() const { + return TTIImpl->getLoopRotationDefaultThreshold(); +} + bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { return TTIImpl->isLegalAddImmediate(Imm); } Index: lib/Target/X86/X86TargetTransformInfo.h =================================================================== --- lib/Target/X86/X86TargetTransformInfo.h +++ lib/Target/X86/X86TargetTransformInfo.h @@ -53,6 +53,7 @@ /// \name Scalar TTI Implementations /// @{ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + unsigned getLoopRotationDefaultThreshold() const; /// @} Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -1587,3 +1587,10 @@ // correct. return (CallerBits & CalleeBits) == CalleeBits; } + +unsigned X86TTIImpl::getLoopRotationDefaultThreshold() const { + // The loop rotation threshold equal to 2 shows the best performance and code + // size results on the benchmarks for Lakemont. For other CPUs use the default + // threshold. + return ST->getCPU() == "lakemont" ? 2 : 16; +} Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -233,7 +233,7 @@ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createReassociatePass()); // Reassociate expressions // Rotate Loop - disable header duplication at -Oz - MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLoopRotatePass(SizeLevel == 2 ? Optional(0) : None)); MPM.add(createLICMPass()); // Hoist loop invariants MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); MPM.add(createCFGSimplificationPass()); @@ -457,7 +457,7 @@ // Re-rotate loops in all our loop nests. These may have fallout out of // rotated form due to GVN or other transformations, and the vectorizer relies // on the rotated form. Disable header duplication at -Oz. - MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLoopRotatePass(SizeLevel == 2 ? Optional(0) : None)); // Distribute loops to allow partial vectorization. I.e. isolate dependences // into separate loop that would otherwise inhibit vectorization. This is Index: lib/Transforms/Scalar/LoopRotation.cpp =================================================================== --- lib/Transforms/Scalar/LoopRotation.cpp +++ lib/Transforms/Scalar/LoopRotation.cpp @@ -43,9 +43,9 @@ #define DEBUG_TYPE "loop-rotate" -static cl::opt DefaultRotationThreshold( +static cl::opt RotationThreshold( "rotation-max-header-size", cl::init(16), cl::Hidden, - cl::desc("The default maximum header size for automatic loop rotation")); + cl::desc("The maximum header size for automatic loop rotation")); STATISTIC(NumRotated, "Number of loops rotated"); @@ -625,7 +625,10 @@ // Optional analyses. auto *DT = FAM.getCachedResult(*F); auto *SE = FAM.getCachedResult(*F); - LoopRotate LR(DefaultRotationThreshold, LI, TTI, AC, DT, SE); + LoopRotate LR(RotationThreshold.getNumOccurrences() == 0 + ? TTI->getLoopRotationDefaultThreshold() + : RotationThreshold, + LI, TTI, AC, DT, SE); bool Changed = LR.processLoop(&L); if (!Changed) @@ -637,15 +640,18 @@ class LoopRotateLegacyPass : public LoopPass { unsigned MaxHeaderSize; + bool UseDefaultMHS; public: static char ID; // Pass ID, replacement for typeid - LoopRotateLegacyPass(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) { + LoopRotateLegacyPass(Optional SpecifiedMaxHeaderSize = None) + : LoopPass(ID) { initializeLoopRotateLegacyPassPass(*PassRegistry::getPassRegistry()); - if (SpecifiedMaxHeaderSize == -1) - MaxHeaderSize = DefaultRotationThreshold; - else - MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize); + + MaxHeaderSize = SpecifiedMaxHeaderSize.hasValue() ? *SpecifiedMaxHeaderSize + : RotationThreshold; + UseDefaultMHS = !SpecifiedMaxHeaderSize.hasValue() && + RotationThreshold.getNumOccurrences() == 0; } // LCSSA form makes instruction renaming easier. @@ -667,7 +673,9 @@ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *SEWP = getAnalysisIfAvailable(); auto *SE = SEWP ? &SEWP->getSE() : nullptr; - LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE); + LoopRotate LR(UseDefaultMHS ? TTI->getLoopRotationDefaultThreshold() + : MaxHeaderSize, + LI, TTI, AC, DT, SE); return LR.processLoop(L); } }; @@ -682,6 +690,6 @@ INITIALIZE_PASS_END(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops", false, false) -Pass *llvm::createLoopRotatePass(int MaxHeaderSize) { +Pass *llvm::createLoopRotatePass(Optional MaxHeaderSize) { return new LoopRotateLegacyPass(MaxHeaderSize); } Index: test/Transforms/LoopRotate/target-default.ll =================================================================== --- /dev/null +++ test/Transforms/LoopRotate/target-default.ll @@ -0,0 +1,39 @@ +; REQUIRES: asserts +; RUN: opt < %s -march=x86 -mcpu=pentium -S -loop-rotate -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=PENTIUM +; RUN: opt < %s -march=x86 -mcpu=lakemont -S -loop-rotate -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=LMT +; RUN: opt < %s -march=x86 -mcpu=pentium -S -loop-rotate -rotation-max-header-size=0 -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=PENTIUM-OPT +; RUN: opt < %s -march=x86 -mcpu=lakemont -S -loop-rotate -rotation-max-header-size=16 -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=LMT-OPT + +; Loop should be rotated for Pentium but not for Lakemont. +; PENTIUM: rotating Loop at depth 1 +; LMT-NOT: rotating Loop at depth 1 + +; Specification of -rotation-max-header-size should suppress default +; target threshold. +; PENTIUM-OPT-NOT: rotating Loop at depth 1 +; LMT-OPT: rotating Loop at depth 1 + +target triple = "i386-unknown-linux-gnu" + +declare void @use(i32*, i32) + +define void @test(i32* %x, i32 %y) { +entry: + br label %for.cond + +for.cond: + %x.addr.0 = phi i32* [ %x, %entry ], [ %incdec.ptr, %for.body ] + %0 = load i32, i32* %x.addr.0, align 4 + %cmp = icmp sgt i32 %0, 0 + %cmp1 = icmp sgt i32 %y, 0 + %or.cond = and i1 %cmp, %cmp1 + br i1 %or.cond, label %for.body, label %for.end + +for.body: + tail call void @use(i32* %x.addr.0, i32 %y) + %incdec.ptr = getelementptr inbounds i32, i32* %x.addr.0, i64 1 + br label %for.cond + +for.end: + ret void +}