Diff 61086

include/llvm/Analysis/TargetTransformInfo.h

Show First 20 Lines • Show All 289 Lines • ▼ Show 20 Lines	struct UnrollingPreferences {
bool Force;		bool Force;
};		};

/// \brief Get target-customized preferences for the generic loop unrolling		/// \brief Get target-customized preferences for the generic loop unrolling
/// transformation. The caller will initialize UP with the current		/// transformation. The caller will initialize UP with the current
/// target-independent defaults.		/// target-independent defaults.
void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;		void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;

		/// \brief Get target-customized default threshold for loop rotation.
		hfinkelUnsubmitted Not Done Reply Inline Actions Please explain here what this threshold actually does (I imagine that this is the maximum header size). Unfortunately, LoopRotate barely documents what it does, so we might need to improve that in order for this to make sense. hfinkel: Please explain here what this threshold actually does (I imagine that this is the maximum…
		unsigned getLoopRotationDefaultThreshold() const;

/// @}		/// @}

/// \name Scalar Target Information		/// \name Scalar Target Information
/// @{		/// @{

/// \brief Flags indicating the kind of support for population count.		/// \brief Flags indicating the kind of support for population count.
///		///
/// Compared to the SW implementation, HW support is supposed to		/// Compared to the SW implementation, HW support is supposed to
▲ Show 20 Lines • Show All 320 Lines • ▼ Show 20 Lines	virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys) = 0;		ArrayRef<Type *> ParamTys) = 0;
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,		virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<const Value *> Arguments) = 0;		ArrayRef<const Value *> Arguments) = 0;
virtual int getUserCost(const User *U) = 0;		virtual int getUserCost(const User *U) = 0;
virtual bool hasBranchDivergence() = 0;		virtual bool hasBranchDivergence() = 0;
virtual bool isSourceOfDivergence(const Value *V) = 0;		virtual bool isSourceOfDivergence(const Value *V) = 0;
virtual bool isLoweredToCall(const Function *F) = 0;		virtual bool isLoweredToCall(const Function *F) = 0;
virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0;		virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0;
		virtual unsigned getLoopRotationDefaultThreshold() const = 0;
virtual bool isLegalAddImmediate(int64_t Imm) = 0;		virtual bool isLegalAddImmediate(int64_t Imm) = 0;
virtual bool isLegalICmpImmediate(int64_t Imm) = 0;		virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
virtual bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV,		virtual bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV,
int64_t BaseOffset, bool HasBaseReg,		int64_t BaseOffset, bool HasBaseReg,
int64_t Scale,		int64_t Scale,
unsigned AddrSpace) = 0;		unsigned AddrSpace) = 0;
virtual bool isLegalMaskedStore(Type *DataType) = 0;		virtual bool isLegalMaskedStore(Type *DataType) = 0;
virtual bool isLegalMaskedLoad(Type *DataType) = 0;		virtual bool isLegalMaskedLoad(Type *DataType) = 0;
▲ Show 20 Lines • Show All 122 Lines • ▼ Show 20 Lines	bool isSourceOfDivergence(const Value *V) override {
return Impl.isSourceOfDivergence(V);		return Impl.isSourceOfDivergence(V);
}		}
bool isLoweredToCall(const Function *F) override {		bool isLoweredToCall(const Function *F) override {
return Impl.isLoweredToCall(F);		return Impl.isLoweredToCall(F);
}		}
void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) override {		void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) override {
return Impl.getUnrollingPreferences(L, UP);		return Impl.getUnrollingPreferences(L, UP);
}		}
		unsigned getLoopRotationDefaultThreshold() const override {
		return Impl.getLoopRotationDefaultThreshold();
		}
bool isLegalAddImmediate(int64_t Imm) override {		bool isLegalAddImmediate(int64_t Imm) override {
return Impl.isLegalAddImmediate(Imm);		return Impl.isLegalAddImmediate(Imm);
}		}
bool isLegalICmpImmediate(int64_t Imm) override {		bool isLegalICmpImmediate(int64_t Imm) override {
return Impl.isLegalICmpImmediate(Imm);		return Impl.isLegalICmpImmediate(Imm);
}		}
bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,		bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,		bool HasBaseReg, int64_t Scale,
▲ Show 20 Lines • Show All 273 Lines • Show Last 20 Lines

include/llvm/Analysis/TargetTransformInfoImpl.h

Show First 20 Lines • Show All 191 Lines • ▼ Show 20 Lines	if (Name == "pow" \|\| Name == "powf" \|\| Name == "powl" \|\| Name == "exp2" \|\|
Name == "llabs")		Name == "llabs")
return false;		return false;

return true;		return true;
}		}

void getUnrollingPreferences(Loop *, TTI::UnrollingPreferences &) {}		void getUnrollingPreferences(Loop *, TTI::UnrollingPreferences &) {}

		unsigned getLoopRotationDefaultThreshold() const { return 16; }

bool isLegalAddImmediate(int64_t Imm) { return false; }		bool isLegalAddImmediate(int64_t Imm) { return false; }

bool isLegalICmpImmediate(int64_t Imm) { return false; }		bool isLegalICmpImmediate(int64_t Imm) { return false; }

bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,		bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,		bool HasBaseReg, int64_t Scale,
unsigned AddrSpace) {		unsigned AddrSpace) {
// Guess that only reg and reg+reg addressing is allowed. This heuristic is		// Guess that only reg and reg+reg addressing is allowed. This heuristic is
▲ Show 20 Lines • Show All 324 Lines • Show Last 20 Lines

include/llvm/CodeGen/BasicTTIImpl.h

Show First 20 Lines • Show All 267 Lines • ▼ Show 20 Lines	for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
}		}
}		}

// Enable runtime and partial unrolling up to the specified size.		// Enable runtime and partial unrolling up to the specified size.
UP.Partial = UP.Runtime = true;		UP.Partial = UP.Runtime = true;
UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps;		UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps;
}		}

		unsigned getLoopRotationDefaultThreshold() const { return 16; }

/// @}		/// @}

/// \name Vector TTI Implementations		/// \name Vector TTI Implementations
/// @{		/// @{

unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }		unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }

unsigned getRegisterBitWidth(bool Vector) { return 32; }		unsigned getRegisterBitWidth(bool Vector) { return 32; }
▲ Show 20 Lines • Show All 654 Lines • Show Last 20 Lines

include/llvm/Transforms/Scalar.h

	Show All 9 Lines
	// This header file defines prototypes for accessor functions that expose passes			// This header file defines prototypes for accessor functions that expose passes
	// in the Scalar transformations library.			// in the Scalar transformations library.
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#ifndef LLVM_TRANSFORMS_SCALAR_H			#ifndef LLVM_TRANSFORMS_SCALAR_H
	#define LLVM_TRANSFORMS_SCALAR_H			#define LLVM_TRANSFORMS_SCALAR_H

				#include "llvm/ADT/Optional.h"
	#include <functional>			#include <functional>

	namespace llvm {			namespace llvm {

	class BasicBlockPass;			class BasicBlockPass;
	class Function;			class Function;
	class FunctionPass;			class FunctionPass;
	class ModulePass;			class ModulePass;
	▲ Show 20 Lines • Show All 152 Lines • ▼ Show 20 Lines
	// LoopReroll - This pass is a simple loop rerolling pass.			// LoopReroll - This pass is a simple loop rerolling pass.
	//			//
	Pass *createLoopRerollPass();			Pass *createLoopRerollPass();

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	//			//
	// LoopRotate - This pass is a simple loop rotating pass.			// LoopRotate - This pass is a simple loop rotating pass.
	//			//
	Pass *createLoopRotatePass(int MaxHeaderSize = -1);			Pass *createLoopRotatePass(Optional<unsigned> MaxHeaderSize = None);

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	//			//
	// LoopIdiom - This pass recognizes and replaces idioms in loops.			// LoopIdiom - This pass recognizes and replaces idioms in loops.
	//			//
	Pass *createLoopIdiomPass();			Pass *createLoopIdiomPass();

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	▲ Show 20 Lines • Show All 322 Lines • Show Last 20 Lines

lib/Analysis/TargetTransformInfo.cpp

Show First 20 Lines • Show All 95 Lines • ▼ Show 20 Lines	bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
return TTIImpl->isLoweredToCall(F);		return TTIImpl->isLoweredToCall(F);
}		}

void TargetTransformInfo::getUnrollingPreferences(		void TargetTransformInfo::getUnrollingPreferences(
Loop *L, UnrollingPreferences &UP) const {		Loop *L, UnrollingPreferences &UP) const {
return TTIImpl->getUnrollingPreferences(L, UP);		return TTIImpl->getUnrollingPreferences(L, UP);
}		}

		unsigned TargetTransformInfo::getLoopRotationDefaultThreshold() const {
		return TTIImpl->getLoopRotationDefaultThreshold();
		}

bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {		bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
return TTIImpl->isLegalAddImmediate(Imm);		return TTIImpl->isLegalAddImmediate(Imm);
}		}

bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {		bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
return TTIImpl->isLegalICmpImmediate(Imm);		return TTIImpl->isLegalICmpImmediate(Imm);
}		}

▲ Show 20 Lines • Show All 336 Lines • Show Last 20 Lines

lib/Target/X86/X86TargetTransformInfo.h

Show First 20 Lines • Show All 47 Lines • ▼ Show 20 Lines	X86TTIImpl(const X86TTIImpl &Arg)
: BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}		: BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
X86TTIImpl(X86TTIImpl &&Arg)		X86TTIImpl(X86TTIImpl &&Arg)
: BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),		: BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
TLI(std::move(Arg.TLI)) {}		TLI(std::move(Arg.TLI)) {}

/// \name Scalar TTI Implementations		/// \name Scalar TTI Implementations
/// @{		/// @{
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);		TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
		unsigned getLoopRotationDefaultThreshold() const;

/// @}		/// @}

/// \name Vector TTI Implementations		/// \name Vector TTI Implementations
/// @{		/// @{

unsigned getNumberOfRegisters(bool Vector);		unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);		unsigned getRegisterBitWidth(bool Vector);
▲ Show 20 Lines • Show All 51 Lines • Show Last 20 Lines

lib/Target/X86/X86TargetTransformInfo.cpp

Show First 20 Lines • Show All 1,581 Lines • ▼ Show 20 Lines	bool X86TTIImpl::areInlineCompatible(const Function *Caller,
const FeatureBitset &CalleeBits =		const FeatureBitset &CalleeBits =
TM.getSubtargetImpl(*Callee)->getFeatureBits();		TM.getSubtargetImpl(*Callee)->getFeatureBits();

// FIXME: This is likely too limiting as it will include subtarget features		// FIXME: This is likely too limiting as it will include subtarget features
// that we might not care about for inlining, but it is conservatively		// that we might not care about for inlining, but it is conservatively
// correct.		// correct.
return (CallerBits & CalleeBits) == CalleeBits;		return (CallerBits & CalleeBits) == CalleeBits;
}		}

		unsigned X86TTIImpl::getLoopRotationDefaultThreshold() const {
		echristoUnsubmitted Not Done Reply Inline Actions Explain the change? echristo: Explain the change?
		aturetskAuthorUnsubmitted Not Done Reply Inline Actions Added a comment. aturetsk: Added a comment.
		echristoUnsubmitted Not Done Reply Inline Actions Can you elaborate? What did you test on, do you know why? Is it just magic or is it based on something about the cpu? echristo: Can you elaborate? What did you test on, do you know why? Is it just magic or is it based on…
		aturetskAuthorUnsubmitted Not Done Reply Inline Actions The positive effect was measured on Spec2000 (code size only, Spec2000 is too large to run on MCU) and Dhrystone (code size and performance). Also tested code size on ULP and code size with performance on Coremark, but there was no effect. The initial idea was to just improve code size for -Os by reducing the threshold, but since performance on Dhrystone improved as well (both for -Os and -O2), it seems to be a good idea to use a smaller threshold for Lakemont regardless of -O option. So this is based on mere tuning, no special idea behind it. aturetsk: The positive effect was measured on Spec2000 (code size only, Spec2000 is too large to run on…
		hfinkelUnsubmitted Not Done Reply Inline Actions If this is something that is useful for a target to tune on a per-model basis, then we should add it to the scheduling model (for example, see LoopMicroOpBufferSize in SchedMachineModel and how that's handled). We really should understand what is going on, however. Loop rotation uses CodeMetrics to calculate the effective size, but that cost model (TTI.getUserCost) has received very little attention. Also, does lakemont need any kind of scheduling model? It does not seem to have one at all. hfinkel: If this is something that is useful for a target to tune on a per-model basis, then we should…
		// The loop rotation threshold equal to 2 shows the best performance and code
		// size results on the benchmarks for Lakemont. For other CPUs use the default
		// threshold.
		return ST->getCPU() == "lakemont" ? 2 : 16;
		}

lib/Transforms/IPO/PassManagerBuilder.cpp

Show First 20 Lines • Show All 227 Lines • ▼ Show 20 Lines	void PassManagerBuilder::addFunctionSimplificationPasses(
// Combine silly seq's		// Combine silly seq's
addInstructionCombiningPass(MPM);		addInstructionCombiningPass(MPM);
addExtensionsToPM(EP_Peephole, MPM);		addExtensionsToPM(EP_Peephole, MPM);

MPM.add(createTailCallEliminationPass()); // Eliminate tail calls		MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs		MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createReassociatePass()); // Reassociate expressions		MPM.add(createReassociatePass()); // Reassociate expressions
// Rotate Loop - disable header duplication at -Oz		// Rotate Loop - disable header duplication at -Oz
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));		MPM.add(createLoopRotatePass(SizeLevel == 2 ? Optional<unsigned>(0) : None));
MPM.add(createLICMPass()); // Hoist loop invariants		MPM.add(createLICMPass()); // Hoist loop invariants
MPM.add(createLoopUnswitchPass(SizeLevel \|\| OptLevel < 3));		MPM.add(createLoopUnswitchPass(SizeLevel \|\| OptLevel < 3));
MPM.add(createCFGSimplificationPass());		MPM.add(createCFGSimplificationPass());
addInstructionCombiningPass(MPM);		addInstructionCombiningPass(MPM);
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars		MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.		MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
MPM.add(createLoopDeletionPass()); // Delete dead loops		MPM.add(createLoopDeletionPass()); // Delete dead loops
if (EnableLoopInterchange) {		if (EnableLoopInterchange) {
▲ Show 20 Lines • Show All 207 Lines • ▼ Show 20 Lines	void PassManagerBuilder::populateModulePassManager(
if (RunFloat2Int)		if (RunFloat2Int)
MPM.add(createFloat2IntPass());		MPM.add(createFloat2IntPass());

addExtensionsToPM(EP_VectorizerStart, MPM);		addExtensionsToPM(EP_VectorizerStart, MPM);

// Re-rotate loops in all our loop nests. These may have fallout out of		// Re-rotate loops in all our loop nests. These may have fallout out of
// rotated form due to GVN or other transformations, and the vectorizer relies		// rotated form due to GVN or other transformations, and the vectorizer relies
// on the rotated form. Disable header duplication at -Oz.		// on the rotated form. Disable header duplication at -Oz.
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));		MPM.add(createLoopRotatePass(SizeLevel == 2 ? Optional<unsigned>(0) : None));

// Distribute loops to allow partial vectorization. I.e. isolate dependences		// Distribute loops to allow partial vectorization. I.e. isolate dependences
// into separate loop that would otherwise inhibit vectorization. This is		// into separate loop that would otherwise inhibit vectorization. This is
// currently only performed for loops marked with the metadata		// currently only performed for loops marked with the metadata
// llvm.loop.distribute=true or when -enable-loop-distribute is specified.		// llvm.loop.distribute=true or when -enable-loop-distribute is specified.
MPM.add(createLoopDistributePass(/ProcessAllLoopsByDefault=/false));		MPM.add(createLoopDistributePass(/ProcessAllLoopsByDefault=/false));

MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));		MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
▲ Show 20 Lines • Show All 383 Lines • Show Last 20 Lines

lib/Transforms/Scalar/LoopRotation.cpp

Show All 37 Lines
#include "llvm/Transforms/Utils/Local.h"		#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"		#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"		#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"		#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;		using namespace llvm;

#define DEBUG_TYPE "loop-rotate"		#define DEBUG_TYPE "loop-rotate"

static cl::opt<unsigned> DefaultRotationThreshold(		static cl::opt<unsigned> RotationThreshold(
"rotation-max-header-size", cl::init(16), cl::Hidden,		"rotation-max-header-size", cl::init(16), cl::Hidden,
cl::desc("The default maximum header size for automatic loop rotation"));		cl::desc("The maximum header size for automatic loop rotation"));

STATISTIC(NumRotated, "Number of loops rotated");		STATISTIC(NumRotated, "Number of loops rotated");

/// A simple loop rotation transformation.		/// A simple loop rotation transformation.
class LoopRotate {		class LoopRotate {
const unsigned MaxHeaderSize;		const unsigned MaxHeaderSize;
LoopInfo *LI;		LoopInfo *LI;
const TargetTransformInfo *TTI;		const TargetTransformInfo *TTI;
▲ Show 20 Lines • Show All 553 Lines • ▼ Show 20 Lines	bool LoopRotate::processLoop(Loop *L) {

return MadeChange;		return MadeChange;
}		}

LoopRotatePass::LoopRotatePass() {}		LoopRotatePass::LoopRotatePass() {}

PreservedAnalyses LoopRotatePass::run(Loop &L, AnalysisManager<Loop> &AM) {		PreservedAnalyses LoopRotatePass::run(Loop &L, AnalysisManager<Loop> &AM) {
auto &FAM = AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();		auto &FAM = AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
Function *F = L.getHeader()->getParent();		Function *F = L.getHeader()->getParent();
		zzhengUnsubmitted Not Done Reply Inline Actions if (SpecifiedThreshold.hasValue()) return SpecifiedThreshold; else { ... } zzheng:* ``` if (SpecifiedThreshold.hasValue()) return *SpecifiedThreshold; else { ... } ```

auto LI = FAM.getCachedResult<LoopAnalysis>(F);		auto LI = FAM.getCachedResult<LoopAnalysis>(F);
const auto TTI = FAM.getCachedResult<TargetIRAnalysis>(F);		const auto TTI = FAM.getCachedResult<TargetIRAnalysis>(F);
auto AC = FAM.getCachedResult<AssumptionAnalysis>(F);		auto AC = FAM.getCachedResult<AssumptionAnalysis>(F);
assert((LI && TTI && AC) && "Analyses for loop rotation not available");		assert((LI && TTI && AC) && "Analyses for loop rotation not available");

// Optional analyses.		// Optional analyses.
auto DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);		auto DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
auto SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(F);		auto SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(F);
LoopRotate LR(DefaultRotationThreshold, LI, TTI, AC, DT, SE);		LoopRotate LR(RotationThreshold.getNumOccurrences() == 0
		? TTI->getLoopRotationDefaultThreshold()
		: RotationThreshold,
		LI, TTI, AC, DT, SE);

bool Changed = LR.processLoop(&L);		bool Changed = LR.processLoop(&L);
if (!Changed)		if (!Changed)
		zzhengUnsubmitted Not Done Reply Inline Actions This indent seems weird, is this the result of clang-format? zzheng: This indent seems weird, is this the result of clang-format?
return PreservedAnalyses::all();		return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();		return getLoopPassPreservedAnalyses();
}		}

namespace {		namespace {

class LoopRotateLegacyPass : public LoopPass {		class LoopRotateLegacyPass : public LoopPass {
unsigned MaxHeaderSize;		unsigned MaxHeaderSize;
		bool UseDefaultMHS;
		echristoUnsubmitted Not Done Reply Inline Actions This all feels awkward. Can you try to rework this a bit? echristo: This all feels awkward. Can you try to rework this a bit?
		aturetskAuthorUnsubmitted Not Done Reply Inline Actions Do I understand right that you don't like having Optional<> type for the variable which is not a function argument? Unfortunately we can't use TTI object in the constructor, so we need to keep somehow the information whether the SpecifiedMaxHeaderSize was passed to the createLoopRotate or not until the chooseMaxHeaderSize is called. I see three ways to do that: Use "Optional<unsigned> SpecifiedThreshold" as it is now Use "int SpecifiedThreshold" and '-1' would mean the argument wasn't passed to the createLoopRotate. That's close to what the patch used to be. Use 'unsigned SpecifiedThreshold' and 'bool IsSpecified'. The bool variable would indicate whether the argument was passed to the createLoopRotate or not. Personally, I prefer the first option. BTW, I saw Optional<> used with class fields in other places (e.g. in lib/Transforms/Scalar/LoopUnrollPass.cpp in LoopUnroll class around the line 785). aturetsk: Do I understand right that you don't like having Optional<> type for the variable which is not…
		zzhengUnsubmitted Not Done Reply Inline Actions I think either 1 or 3 will be fine. If we choose 3, we can remove the static unsigned chooseMaxHeaderSize(). class LoopRotate : public LoopPass { unsigned SpecifiedThreshold; bool IsSpecified; ... LoopRotate(Optional<unsigned> SpecifiedMaxHeaderSize = None) : LoopPass(ID) { ... if (SpecifiedMaxHeaderSize.hasValue()) SpecifiedThreshold = SpecifiedMaxHeaderSize; else SpecifiedThreshold = RotationThreshold; IsSpecified = SpecifiedMaxHeaderSize.hasValue() \|\| RotationThreshold.getNumOccurrences() > 0; } ... We can even change the if-else above to SpecifiedThreshold = SpecifiedMaxHeaderSize.hasValue() ? SpecifiedMaxHeaderSize : RotationThreshold; as long as it conforms with the coding standard. Later: return iterativelyRotateLoop( L, IsSpecified ? SpecifiedThreshold : TTI->getLoopRotationDefaultThreshold(), LI, TTI, AC, DT, SE); zzheng: I think either 1 or 3 will be fine. If we choose 3, we can remove the static unsigned…

public:		public:
static char ID; // Pass ID, replacement for typeid		static char ID; // Pass ID, replacement for typeid
LoopRotateLegacyPass(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {		LoopRotateLegacyPass(Optional<unsigned> SpecifiedMaxHeaderSize = None)
		: LoopPass(ID) {
		zzhengUnsubmitted Not Done Reply Inline Actions Can we take this opportunity to change it to Optional<unsigned> SpecifiedMaxHeaderSize zzheng: Can we take this opportunity to change it to ``` Optional<unsigned> SpecifiedMaxHeaderSize ```
initializeLoopRotateLegacyPassPass(*PassRegistry::getPassRegistry());		initializeLoopRotateLegacyPassPass(*PassRegistry::getPassRegistry());
if (SpecifiedMaxHeaderSize == -1)
MaxHeaderSize = DefaultRotationThreshold;		MaxHeaderSize = SpecifiedMaxHeaderSize.hasValue() ? *SpecifiedMaxHeaderSize
else		: RotationThreshold;
MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);		UseDefaultMHS = !SpecifiedMaxHeaderSize.hasValue() &&
		RotationThreshold.getNumOccurrences() == 0;
}		}

// LCSSA form makes instruction renaming easier.		// LCSSA form makes instruction renaming easier.
void getAnalysisUsage(AnalysisUsage &AU) const override {		void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();		AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();		AU.addRequired<TargetTransformInfoWrapperPass>();
getLoopAnalysisUsage(AU);		getLoopAnalysisUsage(AU);
}		}

bool runOnLoop(Loop *L, LPPassManager &LPM) override {		bool runOnLoop(Loop *L, LPPassManager &LPM) override {
if (skipLoop(L))		if (skipLoop(L))
return false;		return false;
Function &F = *L->getHeader()->getParent();		Function &F = *L->getHeader()->getParent();

auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();		auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);		const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);		auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();		auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;		auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();		auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
auto *SE = SEWP ? &SEWP->getSE() : nullptr;		auto *SE = SEWP ? &SEWP->getSE() : nullptr;
LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE);		LoopRotate LR(UseDefaultMHS ? TTI->getLoopRotationDefaultThreshold()
		: MaxHeaderSize,
		zzhengUnsubmitted Not Done Reply Inline Actions Same indent oddness... zzheng: Same indent oddness...
		LI, TTI, AC, DT, SE);
return LR.processLoop(L);		return LR.processLoop(L);
}		}
};		};
}		}

char LoopRotateLegacyPass::ID = 0;		char LoopRotateLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops",		INITIALIZE_PASS_BEGIN(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops",
false, false)		false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)		INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(LoopPass)		INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)		INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops", false,		INITIALIZE_PASS_END(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops", false,
false)		false)

Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {		Pass *llvm::createLoopRotatePass(Optional<unsigned> MaxHeaderSize) {
return new LoopRotateLegacyPass(MaxHeaderSize);		return new LoopRotateLegacyPass(MaxHeaderSize);
}		}

test/Transforms/LoopRotate/target-default.ll

This file was added.

				; REQUIRES: asserts
				; RUN: opt < %s -march=x86 -mcpu=pentium -S -loop-rotate -debug -debug-only=loop-rotate 2>&1 \| FileCheck %s -check-prefix=PENTIUM
				; RUN: opt < %s -march=x86 -mcpu=lakemont -S -loop-rotate -debug -debug-only=loop-rotate 2>&1 \| FileCheck %s -check-prefix=LMT
				; RUN: opt < %s -march=x86 -mcpu=pentium -S -loop-rotate -rotation-max-header-size=0 -debug -debug-only=loop-rotate 2>&1 \| FileCheck %s -check-prefix=PENTIUM-OPT
				; RUN: opt < %s -march=x86 -mcpu=lakemont -S -loop-rotate -rotation-max-header-size=16 -debug -debug-only=loop-rotate 2>&1 \| FileCheck %s -check-prefix=LMT-OPT

				; Loop should be rotated for Pentium but not for Lakemont.
				; PENTIUM: rotating Loop at depth 1
				; LMT-NOT: rotating Loop at depth 1

				; Specification of -rotation-max-header-size should suppress default
				; target threshold.
				; PENTIUM-OPT-NOT: rotating Loop at depth 1
				; LMT-OPT: rotating Loop at depth 1

				target triple = "i386-unknown-linux-gnu"

				declare void @use(i32*, i32)

				define void @test(i32* %x, i32 %y) {
				entry:
				br label %for.cond

				for.cond:
				%x.addr.0 = phi i32* [ %x, %entry ], [ %incdec.ptr, %for.body ]
				%0 = load i32, i32* %x.addr.0, align 4
				%cmp = icmp sgt i32 %0, 0
				%cmp1 = icmp sgt i32 %y, 0
				%or.cond = and i1 %cmp, %cmp1
				br i1 %or.cond, label %for.body, label %for.end

				for.body:
				tail call void @use(i32* %x.addr.0, i32 %y)
				%incdec.ptr = getelementptr inbounds i32, i32* %x.addr.0, i64 1
				br label %for.cond

				for.end:
				ret void
				}

This is an archive of the discontinued LLVM Phabricator instance.

[Loop Rotation] Make default max rotation header size threshold dependent on target CPU
Needs ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 61086

include/llvm/Analysis/TargetTransformInfo.h

include/llvm/Analysis/TargetTransformInfoImpl.h

include/llvm/CodeGen/BasicTTIImpl.h

include/llvm/Transforms/Scalar.h

lib/Analysis/TargetTransformInfo.cpp

lib/Target/X86/X86TargetTransformInfo.h

lib/Target/X86/X86TargetTransformInfo.cpp

lib/Transforms/IPO/PassManagerBuilder.cpp

lib/Transforms/Scalar/LoopRotation.cpp

test/Transforms/LoopRotate/target-default.ll

This is an archive of the discontinued LLVM Phabricator instance.

[Loop Rotation] Make default max rotation header size threshold dependent on target CPUNeeds ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 61086

include/llvm/Analysis/TargetTransformInfo.h

include/llvm/Analysis/TargetTransformInfoImpl.h

include/llvm/CodeGen/BasicTTIImpl.h

include/llvm/Transforms/Scalar.h

lib/Analysis/TargetTransformInfo.cpp

lib/Target/X86/X86TargetTransformInfo.h

lib/Target/X86/X86TargetTransformInfo.cpp

lib/Transforms/IPO/PassManagerBuilder.cpp

lib/Transforms/Scalar/LoopRotation.cpp

test/Transforms/LoopRotate/target-default.ll

[Loop Rotation] Make default max rotation header size threshold dependent on target CPU
Needs ReviewPublic