Diff 254502

llvm/include/llvm/Analysis/TargetTransformInfo.h

Show First 20 Lines • Show All 841 Lines • ▼ Show 20 Lines	public:
/// \return How much before a load we should place the prefetch		/// \return How much before a load we should place the prefetch
/// instruction. This is currently measured in number of		/// instruction. This is currently measured in number of
/// instructions.		/// instructions.
unsigned getPrefetchDistance() const;		unsigned getPrefetchDistance() const;

/// \return Some HW prefetchers can handle accesses up to a certain		/// \return Some HW prefetchers can handle accesses up to a certain
/// constant stride. This is the minimum stride in bytes where it		/// constant stride. This is the minimum stride in bytes where it
/// makes sense to start adding SW prefetches. The default is 1,		/// makes sense to start adding SW prefetches. The default is 1,
/// i.e. prefetch with any stride.		/// i.e. prefetch with any stride. Sometimes prefetching is beneficial
unsigned getMinPrefetchStride() const;		/// even below the HW prefetcher limit, and the arguments provided are
		/// meant to serve as a basis for deciding this for a particular loop:
		/// \param NumMemAccesses Number of memory accesses in the loop.
		/// \param NumStridedMemAccesses Number of the memory accesses that
		/// ScalarEvolution could find a known stride for.
		MeinersburUnsubmitted Done Reply Inline Actions I am confused with the description of `NumPrefetches`. It looks like its passed the number of different memory accesses in the loop. "Number of prefetches needed for the strided accesses." sounds like hardware requirement, but its passed a property of the code. Can you clarify? Meinersbur: I am confused with the description of `NumPrefetches`. It looks like its passed the number of…
		/// \param NumPrefetches Number of software prefetches that will be emitted
		/// as determined by the addresses involved and the cache line size.
		/// \param HasCall True if the loop contains a call.
		unsigned getMinPrefetchStride(unsigned NumMemAccesses,
		unsigned NumStridedMemAccesses,
		unsigned NumPrefetches,
		bool HasCall) const;

/// \return The maximum number of iterations to prefetch ahead. If		/// \return The maximum number of iterations to prefetch ahead. If
/// the required number of iterations is more than this number, no		/// the required number of iterations is more than this number, no
/// prefetching is performed.		/// prefetching is performed.
unsigned getMaxPrefetchIterationsAhead() const;		unsigned getMaxPrefetchIterationsAhead() const;

		/// \return True if prefetching should also be done for writes.
		bool enableWritePrefetching() const;

/// \return The maximum interleave factor that any transform should try to		/// \return The maximum interleave factor that any transform should try to
/// perform for this target. This number depends on the level of parallelism		/// perform for this target. This number depends on the level of parallelism
/// and the number of execution units in the CPU.		/// and the number of execution units in the CPU.
unsigned getMaxInterleaveFactor(unsigned VF) const;		unsigned getMaxInterleaveFactor(unsigned VF) const;

/// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.		/// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
static OperandValueKind getOperandInfo(Value *V,		static OperandValueKind getOperandInfo(Value *V,
OperandValueProperties &OpProps);		OperandValueProperties &OpProps);
▲ Show 20 Lines • Show All 427 Lines • ▼ Show 20 Lines	public:
/// \return How much before a load we should place the prefetch		/// \return How much before a load we should place the prefetch
/// instruction. This is currently measured in number of		/// instruction. This is currently measured in number of
/// instructions.		/// instructions.
virtual unsigned getPrefetchDistance() const = 0;		virtual unsigned getPrefetchDistance() const = 0;

/// \return Some HW prefetchers can handle accesses up to a certain		/// \return Some HW prefetchers can handle accesses up to a certain
/// constant stride. This is the minimum stride in bytes where it		/// constant stride. This is the minimum stride in bytes where it
/// makes sense to start adding SW prefetches. The default is 1,		/// makes sense to start adding SW prefetches. The default is 1,
/// i.e. prefetch with any stride.		/// i.e. prefetch with any stride. Sometimes prefetching is beneficial
virtual unsigned getMinPrefetchStride() const = 0;		/// even below the HW prefetcher limit, and the arguments provided are
		/// meant to serve as a basis for deciding this for a particular loop.
		virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
		unsigned NumStridedMemAccesses,
		unsigned NumPrefetches,
		bool HasCall) const = 0;
		MeinersburUnsubmitted Done Reply Inline Actions Could you add explanations for the individual parameters to the doxygen comment? Meinersbur: Could you add explanations for the individual parameters to the doxygen comment?

/// \return The maximum number of iterations to prefetch ahead. If		/// \return The maximum number of iterations to prefetch ahead. If
/// the required number of iterations is more than this number, no		/// the required number of iterations is more than this number, no
/// prefetching is performed.		/// prefetching is performed.
virtual unsigned getMaxPrefetchIterationsAhead() const = 0;		virtual unsigned getMaxPrefetchIterationsAhead() const = 0;

		/// \return True if prefetching should also be done for writes.
		virtual bool enableWritePrefetching() const = 0;

virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;		virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
virtual unsigned getArithmeticInstrCost(		virtual unsigned getArithmeticInstrCost(
unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,		unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,		OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,		OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI = nullptr) = 0;		const Instruction *CxtI = nullptr) = 0;
virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,		virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) = 0;		Type *SubTp) = 0;
▲ Show 20 Lines • Show All 362 Lines • ▼ Show 20 Lines	public:
///		///
unsigned getPrefetchDistance() const override {		unsigned getPrefetchDistance() const override {
return Impl.getPrefetchDistance();		return Impl.getPrefetchDistance();
}		}

/// Return the minimum stride necessary to trigger software		/// Return the minimum stride necessary to trigger software
/// prefetching.		/// prefetching.
///		///
unsigned getMinPrefetchStride() const override {		unsigned getMinPrefetchStride(unsigned NumMemAccesses,
return Impl.getMinPrefetchStride();		unsigned NumStridedMemAccesses,
		unsigned NumPrefetches,
		bool HasCall) const override {
		return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
		NumPrefetches, HasCall);
}		}

/// Return the maximum prefetch distance in terms of loop		/// Return the maximum prefetch distance in terms of loop
/// iterations.		/// iterations.
///		///
unsigned getMaxPrefetchIterationsAhead() const override {		unsigned getMaxPrefetchIterationsAhead() const override {
return Impl.getMaxPrefetchIterationsAhead();		return Impl.getMaxPrefetchIterationsAhead();
}		}

		/// \return True if prefetching should also be done for writes.
		bool enableWritePrefetching() const override {
		return Impl.enableWritePrefetching();
		}

unsigned getMaxInterleaveFactor(unsigned VF) override {		unsigned getMaxInterleaveFactor(unsigned VF) override {
return Impl.getMaxInterleaveFactor(VF);		return Impl.getMaxInterleaveFactor(VF);
}		}
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,		unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
unsigned &JTSize,		unsigned &JTSize,
ProfileSummaryInfo *PSI,		ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) override {		BlockFrequencyInfo *BFI) override {
return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);		return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
▲ Show 20 Lines • Show All 286 Lines • Show Last 20 Lines

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Show First 20 Lines • Show All 410 Lines • ▼ Show 20 Lines	llvm::Optional<unsigned> getCacheAssociativity(
case TargetTransformInfo::CacheLevel::L2D:		case TargetTransformInfo::CacheLevel::L2D:
return llvm::Optional<unsigned>();		return llvm::Optional<unsigned>();
}		}

llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");		llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
}		}

unsigned getPrefetchDistance() const { return 0; }		unsigned getPrefetchDistance() const { return 0; }
unsigned getMinPrefetchStride() const { return 1; }		unsigned getMinPrefetchStride(unsigned NumMemAccesses,
		unsigned NumStridedMemAccesses,
		unsigned NumPrefetches,
		bool HasCall) const { return 1; }
unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }		unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
		bool enableWritePrefetching() const { return false; }

unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }		unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }

unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,		unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info,		TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info,		TTI::OperandValueKind Opd2Info,
TTI::OperandValueProperties Opd1PropInfo,		TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo,		TTI::OperandValueProperties Opd2PropInfo,
▲ Show 20 Lines • Show All 485 Lines • Show Last 20 Lines

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Show First 20 Lines • Show All 545 Lines • ▼ Show 20 Lines	public:
virtual unsigned getCacheLineSize() const {		virtual unsigned getCacheLineSize() const {
return getST()->getCacheLineSize();		return getST()->getCacheLineSize();
}		}

virtual unsigned getPrefetchDistance() const {		virtual unsigned getPrefetchDistance() const {
return getST()->getPrefetchDistance();		return getST()->getPrefetchDistance();
}		}

virtual unsigned getMinPrefetchStride() const {		virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
return getST()->getMinPrefetchStride();		unsigned NumStridedMemAccesses,
		unsigned NumPrefetches,
		bool HasCall) const {
		return getST()->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
		NumPrefetches, HasCall);
}		}

virtual unsigned getMaxPrefetchIterationsAhead() const {		virtual unsigned getMaxPrefetchIterationsAhead() const {
return getST()->getMaxPrefetchIterationsAhead();		return getST()->getMaxPrefetchIterationsAhead();
}		}

		virtual bool enableWritePrefetching() const {
		return getST()->enableWritePrefetching();
		}

/// @}		/// @}

/// \name Vector TTI Implementations		/// \name Vector TTI Implementations
/// @{		/// @{

unsigned getRegisterBitWidth(bool Vector) const { return 32; }		unsigned getRegisterBitWidth(bool Vector) const { return 32; }

/// Estimate the overhead of scalarizing an instruction. Insert and Extract		/// Estimate the overhead of scalarizing an instruction. Insert and Extract
▲ Show 20 Lines • Show All 1,203 Lines • Show Last 20 Lines

llvm/include/llvm/MC/MCSubtargetInfo.h

Show First 20 Lines • Show All 257 Lines • ▼ Show 20 Lines	public:
///		///
virtual unsigned getPrefetchDistance() const;		virtual unsigned getPrefetchDistance() const;

/// Return the maximum prefetch distance in terms of loop		/// Return the maximum prefetch distance in terms of loop
/// iterations.		/// iterations.
///		///
virtual unsigned getMaxPrefetchIterationsAhead() const;		virtual unsigned getMaxPrefetchIterationsAhead() const;

		/// \return True if prefetching should also be done for writes.
		///
		virtual bool enableWritePrefetching() const;

/// Return the minimum stride necessary to trigger software		/// Return the minimum stride necessary to trigger software
/// prefetching.		/// prefetching.
///		///
virtual unsigned getMinPrefetchStride() const;		virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
		unsigned NumStridedMemAccesses,
		unsigned NumPrefetches,
		bool HasCall) const;
};		};

} // end namespace llvm		} // end namespace llvm

#endif // LLVM_MC_MCSUBTARGETINFO_H		#endif // LLVM_MC_MCSUBTARGETINFO_H

llvm/lib/Analysis/TargetTransformInfo.cpp

Show First 20 Lines • Show All 513 Lines • ▼ Show 20 Lines	llvm::Optional<unsigned> TargetTransformInfo::getCacheAssociativity(
CacheLevel Level) const {		CacheLevel Level) const {
return TTIImpl->getCacheAssociativity(Level);		return TTIImpl->getCacheAssociativity(Level);
}		}

unsigned TargetTransformInfo::getPrefetchDistance() const {		unsigned TargetTransformInfo::getPrefetchDistance() const {
return TTIImpl->getPrefetchDistance();		return TTIImpl->getPrefetchDistance();
}		}

unsigned TargetTransformInfo::getMinPrefetchStride() const {		unsigned TargetTransformInfo::getMinPrefetchStride(unsigned NumMemAccesses,
return TTIImpl->getMinPrefetchStride();		unsigned NumStridedMemAccesses,
		unsigned NumPrefetches,
		bool HasCall) const {
		return TTIImpl->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
		NumPrefetches, HasCall);
}		}

unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const {		unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const {
return TTIImpl->getMaxPrefetchIterationsAhead();		return TTIImpl->getMaxPrefetchIterationsAhead();
}		}

		bool TargetTransformInfo::enableWritePrefetching() const {
		return TTIImpl->enableWritePrefetching();
		}

unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {		unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
return TTIImpl->getMaxInterleaveFactor(VF);		return TTIImpl->getMaxInterleaveFactor(VF);
}		}

TargetTransformInfo::OperandValueKind		TargetTransformInfo::OperandValueKind
TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) {		TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) {
OperandValueKind OpInfo = OK_AnyValue;		OperandValueKind OpInfo = OK_AnyValue;
OpProps = OP_None;		OpProps = OP_None;
▲ Show 20 Lines • Show All 860 Lines • Show Last 20 Lines

llvm/lib/MC/MCSubtargetInfo.cpp

	Show First 20 Lines • Show All 333 Lines • ▼ Show 20 Lines
	unsigned MCSubtargetInfo::getPrefetchDistance() const {			unsigned MCSubtargetInfo::getPrefetchDistance() const {
	return 0;			return 0;
	}			}

	unsigned MCSubtargetInfo::getMaxPrefetchIterationsAhead() const {			unsigned MCSubtargetInfo::getMaxPrefetchIterationsAhead() const {
	return UINT_MAX;			return UINT_MAX;
	}			}

	unsigned MCSubtargetInfo::getMinPrefetchStride() const {			bool MCSubtargetInfo::enableWritePrefetching() const {
				return false;
				}

				unsigned MCSubtargetInfo::getMinPrefetchStride(unsigned NumMemAccesses,
				unsigned NumStridedMemAccesses,
				unsigned NumPrefetches,
				bool HasCall) const {
	return 1;			return 1;
	}			}

llvm/lib/Target/AArch64/AArch64Subtarget.h

Show First 20 Lines • Show All 358 Lines • ▼ Show 20 Lines	public:
bool useRSqrt() const { return UseRSqrt; }		bool useRSqrt() const { return UseRSqrt; }
bool force32BitJumpTables() const { return Force32BitJumpTables; }		bool force32BitJumpTables() const { return Force32BitJumpTables; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }		unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getVectorInsertExtractBaseCost() const {		unsigned getVectorInsertExtractBaseCost() const {
return VectorInsertExtractBaseCost;		return VectorInsertExtractBaseCost;
}		}
unsigned getCacheLineSize() const override { return CacheLineSize; }		unsigned getCacheLineSize() const override { return CacheLineSize; }
unsigned getPrefetchDistance() const override { return PrefetchDistance; }		unsigned getPrefetchDistance() const override { return PrefetchDistance; }
unsigned getMinPrefetchStride() const override { return MinPrefetchStride; }		unsigned getMinPrefetchStride(unsigned NumMemAccesses,
		unsigned NumStridedMemAccesses,
		unsigned NumPrefetches,
		bool HasCall) const override {
		return MinPrefetchStride;
		}
unsigned getMaxPrefetchIterationsAhead() const override {		unsigned getMaxPrefetchIterationsAhead() const override {
return MaxPrefetchIterationsAhead;		return MaxPrefetchIterationsAhead;
}		}
unsigned getPrefFunctionLogAlignment() const {		unsigned getPrefFunctionLogAlignment() const {
return PrefFunctionLogAlignment;		return PrefFunctionLogAlignment;
}		}
unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; }		unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; }

▲ Show 20 Lines • Show All 136 Lines • Show Last 20 Lines

llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h

Show First 20 Lines • Show All 54 Lines • ▼ Show 20 Lines	public:

/// \name Vector TTI Implementations		/// \name Vector TTI Implementations
/// @{		/// @{

unsigned getNumberOfRegisters(unsigned ClassID) const;		unsigned getNumberOfRegisters(unsigned ClassID) const;
unsigned getRegisterBitWidth(bool Vector) const;		unsigned getRegisterBitWidth(bool Vector) const;

unsigned getCacheLineSize() const override { return 256; }		unsigned getCacheLineSize() const override { return 256; }
unsigned getPrefetchDistance() const override { return 2000; }		unsigned getPrefetchDistance() const override { return 4500; }
unsigned getMinPrefetchStride() const override { return 2048; }		unsigned getMinPrefetchStride(unsigned NumMemAccesses,
		unsigned NumStridedMemAccesses,
		unsigned NumPrefetches,
		bool HasCall) const override;
		bool enableWritePrefetching() const override { return true; }

bool hasDivRemOp(Type *DataType, bool IsSigned);		bool hasDivRemOp(Type *DataType, bool IsSigned);
bool prefersVectorizedAddressing() { return false; }		bool prefersVectorizedAddressing() { return false; }
bool LSRWithInstrQueries() { return true; }		bool LSRWithInstrQueries() { return true; }
bool supportsEfficientVectorElementLoadStore() { return true; }		bool supportsEfficientVectorElementLoadStore() { return true; }
bool enableInterleavedAccessVectorization() { return true; }		bool enableInterleavedAccessVectorization() { return true; }

int getArithmeticInstrCost(		int getArithmeticInstrCost(
▲ Show 20 Lines • Show All 42 Lines • Show Last 20 Lines

llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp

	Show First 20 Lines • Show All 317 Lines • ▼ Show 20 Lines
	unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) const {			unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) const {
	if (!Vector)			if (!Vector)
	return 64;			return 64;
	if (ST->hasVector())			if (ST->hasVector())
	return 128;			return 128;
	return 0;			return 0;
	}			}

				unsigned SystemZTTIImpl::getMinPrefetchStride(unsigned NumMemAccesses,
				unsigned NumStridedMemAccesses,
				unsigned NumPrefetches,
				bool HasCall) const {
				// Don't prefetch a loop with many far apart accesses.
				if (NumPrefetches > 16)
				return UINT_MAX;

				// Emit prefetch instructions for smaller strides in cases where we think
				// the hardware prefetcher might not be able to keep up.
				if (NumStridedMemAccesses > 32 &&
				NumStridedMemAccesses == NumMemAccesses && !HasCall)
				return 1;

				return ST->hasMiscellaneousExtensions3() ? 8192 : 2048;
				}

	bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {			bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
	EVT VT = TLI->getValueType(DL, DataType);			EVT VT = TLI->getValueType(DL, DataType);
	return (VT.isScalarInteger() && TLI->isTypeLegal(VT));			return (VT.isScalarInteger() && TLI->isTypeLegal(VT));
	}			}

	// Return the bit size for the scalar type or vector element			// Return the bit size for the scalar type or vector element
	// type. getScalarSizeInBits() returns 0 for a pointer type.			// type. getScalarSizeInBits() returns 0 for a pointer type.
	static unsigned getScalarSizeInBits(Type *Ty) {			static unsigned getScalarSizeInBits(Type *Ty) {
	▲ Show 20 Lines • Show All 812 Lines • Show Last 20 Lines

llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp

	Show All 18 Lines
	#include "llvm/Analysis/AssumptionCache.h"			#include "llvm/Analysis/AssumptionCache.h"
	#include "llvm/Analysis/CodeMetrics.h"			#include "llvm/Analysis/CodeMetrics.h"
	#include "llvm/Analysis/LoopInfo.h"			#include "llvm/Analysis/LoopInfo.h"
	#include "llvm/Analysis/OptimizationRemarkEmitter.h"			#include "llvm/Analysis/OptimizationRemarkEmitter.h"
	#include "llvm/Analysis/ScalarEvolution.h"			#include "llvm/Analysis/ScalarEvolution.h"
	#include "llvm/Analysis/ScalarEvolutionExpander.h"			#include "llvm/Analysis/ScalarEvolutionExpander.h"
	#include "llvm/Analysis/ScalarEvolutionExpressions.h"			#include "llvm/Analysis/ScalarEvolutionExpressions.h"
	#include "llvm/Analysis/TargetTransformInfo.h"			#include "llvm/Analysis/TargetTransformInfo.h"
				#include "llvm/CodeGen/TargetLowering.h"
	#include "llvm/IR/CFG.h"			#include "llvm/IR/CFG.h"
	#include "llvm/IR/Dominators.h"			#include "llvm/IR/Dominators.h"
	#include "llvm/IR/Function.h"			#include "llvm/IR/Function.h"
	#include "llvm/IR/Module.h"			#include "llvm/IR/Module.h"
	#include "llvm/Support/CommandLine.h"			#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Debug.h"			#include "llvm/Support/Debug.h"
	#include "llvm/Transforms/Scalar.h"			#include "llvm/Transforms/Scalar.h"
	#include "llvm/Transforms/Utils/BasicBlockUtils.h"			#include "llvm/Transforms/Utils/BasicBlockUtils.h"
	Show All 21 Lines

	STATISTIC(NumPrefetches, "Number of prefetches inserted");			STATISTIC(NumPrefetches, "Number of prefetches inserted");

	namespace {			namespace {

	/// Loop prefetch implementation class.			/// Loop prefetch implementation class.
	class LoopDataPrefetch {			class LoopDataPrefetch {
	public:			public:
	LoopDataPrefetch(AssumptionCache AC, LoopInfo LI, ScalarEvolution *SE,			LoopDataPrefetch(AssumptionCache AC, DominatorTree DT, LoopInfo *LI,
	const TargetTransformInfo *TTI,			ScalarEvolution SE, const TargetTransformInfo TTI,
	OptimizationRemarkEmitter *ORE)			OptimizationRemarkEmitter *ORE)
	: AC(AC), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}			: AC(AC), DT(DT), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}

	bool run();			bool run();

	private:			private:
	bool runOnLoop(Loop *L);			bool runOnLoop(Loop *L);

	/// Check if the stride of the accesses is large enough to			/// Check if the stride of the accesses is large enough to
	/// warrant a prefetch.			/// warrant a prefetch.
	bool isStrideLargeEnough(const SCEVAddRecExpr *AR);			bool isStrideLargeEnough(const SCEVAddRecExpr *AR, unsigned TargetMinStride);

	unsigned getMinPrefetchStride() {			unsigned getMinPrefetchStride(unsigned NumMemAccesses,
				unsigned NumStridedMemAccesses,
				unsigned NumPrefetches,
				bool HasCall) {
	if (MinPrefetchStride.getNumOccurrences() > 0)			if (MinPrefetchStride.getNumOccurrences() > 0)
	return MinPrefetchStride;			return MinPrefetchStride;
	return TTI->getMinPrefetchStride();			return TTI->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
				NumPrefetches, HasCall);
	}			}

	unsigned getPrefetchDistance() {			unsigned getPrefetchDistance() {
	if (PrefetchDistance.getNumOccurrences() > 0)			if (PrefetchDistance.getNumOccurrences() > 0)
	return PrefetchDistance;			return PrefetchDistance;
	return TTI->getPrefetchDistance();			return TTI->getPrefetchDistance();
	}			}

	unsigned getMaxPrefetchIterationsAhead() {			unsigned getMaxPrefetchIterationsAhead() {
	if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)			if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)
	return MaxPrefetchIterationsAhead;			return MaxPrefetchIterationsAhead;
	return TTI->getMaxPrefetchIterationsAhead();			return TTI->getMaxPrefetchIterationsAhead();
	}			}

				bool doPrefetchWrites() {
				if (PrefetchWrites.getNumOccurrences() > 0)
				return PrefetchWrites;
				return TTI->enableWritePrefetching();
				}

	AssumptionCache *AC;			AssumptionCache *AC;
				DominatorTree *DT;
	LoopInfo *LI;			LoopInfo *LI;
	ScalarEvolution *SE;			ScalarEvolution *SE;
	const TargetTransformInfo *TTI;			const TargetTransformInfo *TTI;
	OptimizationRemarkEmitter *ORE;			OptimizationRemarkEmitter *ORE;
	};			};

	/// Legacy class for inserting loop data prefetches.			/// Legacy class for inserting loop data prefetches.
	class LoopDataPrefetchLegacyPass : public FunctionPass {			class LoopDataPrefetchLegacyPass : public FunctionPass {
	public:			public:
	static char ID; // Pass ID, replacement for typeid			static char ID; // Pass ID, replacement for typeid
	LoopDataPrefetchLegacyPass() : FunctionPass(ID) {			LoopDataPrefetchLegacyPass() : FunctionPass(ID) {
	initializeLoopDataPrefetchLegacyPassPass(*PassRegistry::getPassRegistry());			initializeLoopDataPrefetchLegacyPassPass(*PassRegistry::getPassRegistry());
	}			}

	void getAnalysisUsage(AnalysisUsage &AU) const override {			void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.addRequired<AssumptionCacheTracker>();			AU.addRequired<AssumptionCacheTracker>();
				AU.addRequired<DominatorTreeWrapperPass>();
	AU.addPreserved<DominatorTreeWrapperPass>();			AU.addPreserved<DominatorTreeWrapperPass>();
	AU.addRequired<LoopInfoWrapperPass>();			AU.addRequired<LoopInfoWrapperPass>();
	AU.addPreserved<LoopInfoWrapperPass>();			AU.addPreserved<LoopInfoWrapperPass>();
	AU.addRequired<OptimizationRemarkEmitterWrapperPass>();			AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
	AU.addRequired<ScalarEvolutionWrapperPass>();			AU.addRequired<ScalarEvolutionWrapperPass>();
	AU.addPreserved<ScalarEvolutionWrapperPass>();			AU.addPreserved<ScalarEvolutionWrapperPass>();
	AU.addRequired<TargetTransformInfoWrapperPass>();			AU.addRequired<TargetTransformInfoWrapperPass>();
	}			}
	Show All 12 Lines
	INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)			INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
	INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",			INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
	"Loop Data Prefetch", false, false)			"Loop Data Prefetch", false, false)

	FunctionPass *llvm::createLoopDataPrefetchPass() {			FunctionPass *llvm::createLoopDataPrefetchPass() {
	return new LoopDataPrefetchLegacyPass();			return new LoopDataPrefetchLegacyPass();
	}			}

	bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR) {			bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR,
	unsigned TargetMinStride = getMinPrefetchStride();			unsigned TargetMinStride) {
	// No need to check if any stride goes.			// No need to check if any stride goes.
	if (TargetMinStride <= 1)			if (TargetMinStride <= 1)
	return true;			return true;

	const auto ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE));			const auto ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE));
	// If MinStride is set, don't prefetch unless we can ensure that stride is			// If MinStride is set, don't prefetch unless we can ensure that stride is
	// larger.			// larger.
	if (!ConstStride)			if (!ConstStride)
	return false;			return false;

	unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());			unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
	return TargetMinStride <= AbsStride;			return TargetMinStride <= AbsStride;
	}			}

	PreservedAnalyses LoopDataPrefetchPass::run(Function &F,			PreservedAnalyses LoopDataPrefetchPass::run(Function &F,
	FunctionAnalysisManager &AM) {			FunctionAnalysisManager &AM) {
				DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
	LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);			LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
	ScalarEvolution *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);			ScalarEvolution *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
	AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);			AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
	OptimizationRemarkEmitter *ORE =			OptimizationRemarkEmitter *ORE =
	&AM.getResult<OptimizationRemarkEmitterAnalysis>(F);			&AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
	const TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);			const TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);

	LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE);			LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
	bool Changed = LDP.run();			bool Changed = LDP.run();

	if (Changed) {			if (Changed) {
	PreservedAnalyses PA;			PreservedAnalyses PA;
	PA.preserve<DominatorTreeAnalysis>();			PA.preserve<DominatorTreeAnalysis>();
	PA.preserve<LoopAnalysis>();			PA.preserve<LoopAnalysis>();
	return PA;			return PA;
	}			}

	return PreservedAnalyses::all();			return PreservedAnalyses::all();
	}			}

	bool LoopDataPrefetchLegacyPass::runOnFunction(Function &F) {			bool LoopDataPrefetchLegacyPass::runOnFunction(Function &F) {
	if (skipFunction(F))			if (skipFunction(F))
	return false;			return false;

				DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
	LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();			LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
	ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();			ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
	AssumptionCache *AC =			AssumptionCache *AC =
	&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);			&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
	OptimizationRemarkEmitter *ORE =			OptimizationRemarkEmitter *ORE =
	&getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();			&getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
	const TargetTransformInfo *TTI =			const TargetTransformInfo *TTI =
	&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);			&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);

	LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE);			LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
	return LDP.run();			return LDP.run();
	}			}

	bool LoopDataPrefetch::run() {			bool LoopDataPrefetch::run() {
	// If PrefetchDistance is not set, don't run the pass. This gives an			// If PrefetchDistance is not set, don't run the pass. This gives an
	// opportunity for targets to run this pass for selected subtargets only			// opportunity for targets to run this pass for selected subtargets only
	// (whose TTI sets PrefetchDistance).			// (whose TTI sets PrefetchDistance).
	if (getPrefetchDistance() == 0)			if (getPrefetchDistance() == 0)
	return false;			return false;
	assert(TTI->getCacheLineSize() && "Cache line size is not set for target");			assert(TTI->getCacheLineSize() && "Cache line size is not set for target");

	bool MadeChange = false;			bool MadeChange = false;

	for (Loop I : LI)			for (Loop I : LI)
	for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)			for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
	MadeChange \|= runOnLoop(*L);			MadeChange \|= runOnLoop(*L);

	return MadeChange;			return MadeChange;
	}			}

				/// A record for a potential prefetch made during the initial scan of the
				/// loop. This is used to let a single prefetch target multiple memory accesses.
				struct Prefetch {
				/// The address formula for this prefetch as returned by ScalarEvolution.
				const SCEVAddRecExpr *LSCEVAddRec;
				/// The point of insertion for the prefetch instruction.
				jonpaAuthorUnsubmitted Done Reply Inline Actions The Prefetch struct MemI member is used solely for debug/ORE output. It is currently not guarded by '#ifndef NDEBUG', but maybe it should be, even though it's just one pointer? The debug output could perhaps be improved also, I merely tried to keep what there was while also printing the new values gathered. The MemI member is not needed for anything else, so if we could change the debug output instead perhaps it's not even needed, or? jonpa: The Prefetch struct MemI member is used solely for debug/ORE output. It is currently not…
				MeinersburUnsubmitted Not Done Reply Inline Actions I'd appreciate if it was guarded by `#ifndef NDEBUG`. It would make it more explicit and enforcing to be only used in debug builds. Meinersbur: I'd appreciate if it was guarded by `#ifndef NDEBUG`. It would make it more explicit and…
				jonpaAuthorUnsubmitted Done Reply Inline Actions I don't think that's possible given that it's also used by the ORE, or? jonpa: I don't think that's possible given that it's also used by the ORE, or?
				MeinersburUnsubmitted Done Reply Inline Actions Well, then it's not only for debug output as the comment says. Remove the part in parens? I'd appreciate if the other class members had doxygen comments as well. Meinersbur: Well, then it's not only for debug output as the comment says. Remove the part in parens? I'd…
				Instruction *InsertPt;
				/// True if targeting a write memory access.
				bool Writes;
				/// The (first seen) prefetched instruction.
				MeinersburUnsubmitted Done Reply Inline Actions [style] Start parameter names with capital letters (PascalCase) Meinersbur: [style] [[ https://llvm.org/docs/CodingStandards.html#name-types-functions-variables-and…
				Instruction *MemI;
				MeinersburUnsubmitted Done Reply Inline Actions [style] Please add empty lines between methods. Also, add a doxygen comment about what this method and its parameters (and the class itself) are supposed to do. Meinersbur: [style] Please add empty lines between methods. Also, add a doxygen comment about what this…

				/// Constructor to create a new Prefetch for \param I.
				Prefetch(const SCEVAddRecExpr L, Instruction I)
				: LSCEVAddRec(L), InsertPt(nullptr), Writes(false), MemI(nullptr) {
				addInstruction(I);
				};

				MeinersburUnsubmitted Done Reply Inline Actions [style] please clang-format the patch. Meinersbur: [style] please clang-format the patch.
				/// Add the instruction \param I to this prefetch. If it's not the first
				/// one, 'InsertPt' and 'Writes' will be updated as required.
				/// \param PtrDiff the known constant address difference to the first added
				/// instruction.
				void addInstruction(Instruction I, DominatorTree DT = nullptr,
				int64_t PtrDiff = 0) {
				jonpaAuthorUnsubmitted Done Reply Inline Actions I am trusting DT->findNearestCommonDominator() and DomBB->getTerminator() to do this, but I am not 100% sure that there is always a terminator in each block...? jonpa: I am trusting DT->findNearestCommonDominator() and DomBB->getTerminator() to do this, but I am…
				if (!InsertPt) {
				MemI = I;
				InsertPt = I;
				Writes = isa<StoreInst>(I);
				} else {
				BasicBlock *PrefBB = InsertPt->getParent();
				BasicBlock *InsBB = I->getParent();
				if (PrefBB != InsBB) {
				BasicBlock *DomBB = DT->findNearestCommonDominator(PrefBB, InsBB);
				if (DomBB != PrefBB)
				InsertPt = DomBB->getTerminator();
				}

				if (isa<StoreInst>(I) && PtrDiff == 0)
				Writes = true;
				}
				}
				};

	bool LoopDataPrefetch::runOnLoop(Loop *L) {			bool LoopDataPrefetch::runOnLoop(Loop *L) {
	bool MadeChange = false;			bool MadeChange = false;

	// Only prefetch in the inner-most loop			// Only prefetch in the inner-most loop
	if (!L->empty())			if (!L->empty())
	return MadeChange;			return MadeChange;

	SmallPtrSet<const Value *, 32> EphValues;			SmallPtrSet<const Value *, 32> EphValues;
	CodeMetrics::collectEphemeralValues(L, AC, EphValues);			CodeMetrics::collectEphemeralValues(L, AC, EphValues);

	// Calculate the number of iterations ahead to prefetch			// Calculate the number of iterations ahead to prefetch
	CodeMetrics Metrics;			CodeMetrics Metrics;
				bool HasCall = false;
	for (const auto BB : L->blocks()) {			for (const auto BB : L->blocks()) {
	// If the loop already has prefetches, then assume that the user knows			// If the loop already has prefetches, then assume that the user knows
	// what they are doing and don't add any more.			// what they are doing and don't add any more.
	for (auto &I : *BB)			for (auto &I : *BB) {
	if (CallInst *CI = dyn_cast<CallInst>(&I))			if (isa<CallInst>(&I) \|\| isa<InvokeInst>(&I)) {
	if (Function *F = CI->getCalledFunction())			ImmutableCallSite CS(&I);
				if (const Function *F = CS.getCalledFunction()) {
	if (F->getIntrinsicID() == Intrinsic::prefetch)			if (F->getIntrinsicID() == Intrinsic::prefetch)
	return MadeChange;			return MadeChange;
				if (TTI->isLoweredToCall(F))
				HasCall = true;
				} else { // indirect call.
				HasCall = true;
				}
				}
				}
	Metrics.analyzeBasicBlock(BB, *TTI, EphValues);			Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
	}			}
	unsigned LoopSize = Metrics.NumInsts;			unsigned LoopSize = Metrics.NumInsts;
	if (!LoopSize)			if (!LoopSize)
	LoopSize = 1;			LoopSize = 1;

	unsigned ItersAhead = getPrefetchDistance() / LoopSize;			unsigned ItersAhead = getPrefetchDistance() / LoopSize;
	if (!ItersAhead)			if (!ItersAhead)
	ItersAhead = 1;			ItersAhead = 1;

	if (ItersAhead > getMaxPrefetchIterationsAhead())			if (ItersAhead > getMaxPrefetchIterationsAhead())
	return MadeChange;			return MadeChange;

	LLVM_DEBUG(dbgs() << "Prefetching " << ItersAhead			unsigned ConstantMaxTripCount = SE->getSmallConstantMaxTripCount(L);
	<< " iterations ahead (loop size: " << LoopSize << ") in "			if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)
	<< L->getHeader()->getParent()->getName() << ": " << *L);			return MadeChange;

	SmallVector<std::pair<Instruction , const SCEVAddRecExpr >, 16> PrefLoads;			unsigned NumMemAccesses = 0;
	for (const auto BB : L->blocks()) {			unsigned NumStridedMemAccesses = 0;
				SmallVector<Prefetch, 16> Prefetches;
				for (const auto BB : L->blocks())
	for (auto &I : *BB) {			for (auto &I : *BB) {
	Value *PtrValue;			Value *PtrValue;
	Instruction *MemI;			Instruction *MemI;

	if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) {			if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) {
	MemI = LMemI;			MemI = LMemI;
	PtrValue = LMemI->getPointerOperand();			PtrValue = LMemI->getPointerOperand();
	} else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) {			} else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) {
	if (!PrefetchWrites) continue;			if (!doPrefetchWrites()) continue;
	MemI = SMemI;			MemI = SMemI;
	PtrValue = SMemI->getPointerOperand();			PtrValue = SMemI->getPointerOperand();
	} else continue;			} else continue;

	unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();			unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
	if (PtrAddrSpace)			if (PtrAddrSpace)
	continue;			continue;
				NumMemAccesses++;
	if (L->isLoopInvariant(PtrValue))			if (L->isLoopInvariant(PtrValue))
	continue;			continue;

	const SCEV *LSCEV = SE->getSCEV(PtrValue);			const SCEV *LSCEV = SE->getSCEV(PtrValue);
	const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);			const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
	if (!LSCEVAddRec)			if (!LSCEVAddRec)
	continue;			continue;
				NumStridedMemAccesses++;

	// Check if the stride of the accesses is large enough to warrant a			// We don't want to double prefetch individual cache lines. If this
	// prefetch.			// access is known to be within one cache line of some other one that
	if (!isStrideLargeEnough(LSCEVAddRec))			// has already been prefetched, then don't prefetch this one as well.
	continue;

	// We don't want to double prefetch individual cache lines. If this load
	// is known to be within one cache line of some other load that has
	// already been prefetched, then don't prefetch this one as well.
	bool DupPref = false;			bool DupPref = false;
	for (const auto &PrefLoad : PrefLoads) {			for (auto &Pref : Prefetches) {
	const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, PrefLoad.second);			const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, Pref.LSCEVAddRec);
	if (const SCEVConstant *ConstPtrDiff =			if (const SCEVConstant *ConstPtrDiff =
	dyn_cast<SCEVConstant>(PtrDiff)) {			dyn_cast<SCEVConstant>(PtrDiff)) {
	int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());			int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
	if (PD < (int64_t) TTI->getCacheLineSize()) {			if (PD < (int64_t) TTI->getCacheLineSize()) {
				Pref.addInstruction(MemI, DT, PD);
	DupPref = true;			DupPref = true;
	break;			break;
	}			}
	}			}
	}			}
	if (DupPref)			if (!DupPref)
				Prefetches.push_back(Prefetch(LSCEVAddRec, MemI));
				}

				unsigned TargetMinStride =
				getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
				Prefetches.size(), HasCall);

				LLVM_DEBUG(dbgs() << "Prefetching " << ItersAhead
				<< " iterations ahead (loop size: " << LoopSize << ") in "
				<< L->getHeader()->getParent()->getName() << ": " << *L);
				LLVM_DEBUG(dbgs() << "Loop has: "
				<< NumMemAccesses << " memory accesses, "
				<< NumStridedMemAccesses << " strided memory accesses, "
				<< Prefetches.size() << " potential prefetch(es), "
				<< "a minimum stride of " << TargetMinStride << ", "
				<< (HasCall ? "calls" : "no calls") << ".\n");

				for (auto &P : Prefetches) {
				// Check if the stride of the accesses is large enough to warrant a
				// prefetch.
				if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride))
	continue;			continue;

	const SCEV *NextLSCEV = SE->getAddExpr(LSCEVAddRec, SE->getMulExpr(			const SCEV *NextLSCEV = SE->getAddExpr(P.LSCEVAddRec, SE->getMulExpr(
	SE->getConstant(LSCEVAddRec->getType(), ItersAhead),			SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead),
	LSCEVAddRec->getStepRecurrence(*SE)));			P.LSCEVAddRec->getStepRecurrence(*SE)));
	if (!isSafeToExpand(NextLSCEV, *SE))			if (!isSafeToExpand(NextLSCEV, *SE))
	continue;			continue;

	PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec));			BasicBlock *BB = P.InsertPt->getParent();
				Type I8Ptr = Type::getInt8PtrTy(BB->getContext(), 0/PtrAddrSpace*/);
				SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr");
				Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt);

	Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), PtrAddrSpace);			IRBuilder<> Builder(P.InsertPt);
	SCEVExpander SCEVE(*SE, I.getModule()->getDataLayout(), "prefaddr");
	Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI);

	IRBuilder<> Builder(MemI);
	Module *M = BB->getParent()->getParent();			Module *M = BB->getParent()->getParent();
	Type *I32 = Type::getInt32Ty(BB->getContext());			Type *I32 = Type::getInt32Ty(BB->getContext());
	Function *PrefetchFunc = Intrinsic::getDeclaration(			Function *PrefetchFunc = Intrinsic::getDeclaration(
	M, Intrinsic::prefetch, PrefPtrValue->getType());			M, Intrinsic::prefetch, PrefPtrValue->getType());
	Builder.CreateCall(			Builder.CreateCall(
	PrefetchFunc,			PrefetchFunc,
	{PrefPtrValue,			{PrefPtrValue,
	ConstantInt::get(I32, MemI->mayReadFromMemory() ? 0 : 1),			ConstantInt::get(I32, P.Writes),
	ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});			ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
	++NumPrefetches;			++NumPrefetches;
	LLVM_DEBUG(dbgs() << " Access: " << PtrValue << ", SCEV: " << LSCEV			LLVM_DEBUG(dbgs() << " Access: "
	<< "\n");			<< *P.MemI->getOperand(isa<LoadInst>(P.MemI) ? 0 : 1)
				<< ", SCEV: " << *P.LSCEVAddRec << "\n");
	ORE->emit([&]() {			ORE->emit([&]() {
	return OptimizationRemark(DEBUG_TYPE, "Prefetched", MemI)			return OptimizationRemark(DEBUG_TYPE, "Prefetched", P.MemI)
	<< "prefetched memory access";			<< "prefetched memory access";
	});			});

	MadeChange = true;			MadeChange = true;
	}			}
	}

	return MadeChange;			return MadeChange;
	}			}

llvm/test/CodeGen/SystemZ/prefetch-02.ll

This file was added.

				; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -prefetch-distance=100 \
				; RUN: -stop-after=loop-data-prefetch \| FileCheck %s -check-prefix=FAR-PREFETCH
				; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -prefetch-distance=20 \
				; RUN: -stop-after=loop-data-prefetch \| FileCheck %s -check-prefix=NEAR-PREFETCH
				;
				; Check that prefetches are not emitted when the known constant trip count of
				; the loop is smaller than the estimated "iterations ahead" of the prefetch.
				;
				; FAR-PREFETCH-LABEL: fun
				; FAR-PREFETCH-NOT: call void @llvm.prefetch

				; NEAR-PREFETCH-LABEL: fun
				; NEAR-PREFETCH: call void @llvm.prefetch


				define void @fun(i32* nocapture %Src, i32* nocapture readonly %Dst) {
				entry:
				br label %for.body

				for.cond.cleanup: ; preds = %for.body
				ret void

				for.body: ; preds = %for.body, %entry
				%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next.9, %for.body ]
				%arrayidx = getelementptr inbounds i32, i32* %Dst, i64 %indvars.iv
				%0 = load i32, i32* %arrayidx, align 4
				%arrayidx2 = getelementptr inbounds i32, i32* %Src, i64 %indvars.iv
				store i32 %0, i32* %arrayidx2, align 4
				%indvars.iv.next.9 = add nuw nsw i64 %indvars.iv, 1600
				%cmp.9 = icmp ult i64 %indvars.iv.next.9, 11200
				br i1 %cmp.9, label %for.body, label %for.cond.cleanup
				}

llvm/test/CodeGen/SystemZ/prefetch-03.ll

This file was added.

				; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -prefetch-distance=50 \
				; RUN: -loop-prefetch-writes -stop-after=loop-data-prefetch \| FileCheck %s
				;
				; Check that prefetches are emitted in a position that is executed each
				; iteration for each targeted memory instruction. The two stores in %true and
				; %false are within one cache line in memory, so they should get a single
				; prefetch in %for.body.
				;
				; CHECK-LABEL: for.body
				; CHECK: call void @llvm.prefetch.p0i8(i8* {{.*}}, i32 0
				; CHECK: call void @llvm.prefetch.p0i8(i8* {{.*}}, i32 1
				; CHECK-LABEL: true
				; CHECK-LABEL: false
				; CHECK-LABEL: latch

				define void @fun(i32* nocapture %Src, i32* nocapture readonly %Dst) {
				entry:
				br label %for.body

				for.body:
				%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next.9, %latch ]
				%arrayidx = getelementptr inbounds i32, i32* %Dst, i64 %indvars.iv
				%0 = load i32, i32* %arrayidx, align 4
				%cmp = icmp sgt i32 %0, 0
				br i1 %cmp, label %true, label %false

				true:
				%arrayidx2 = getelementptr inbounds i32, i32* %Src, i64 %indvars.iv
				store i32 %0, i32* %arrayidx2, align 4
				br label %latch

				false:
				%a = add i64 %indvars.iv, 8
				%arrayidx3 = getelementptr inbounds i32, i32* %Src, i64 %a
				store i32 %0, i32* %arrayidx3, align 4
				br label %latch

				latch:
				%indvars.iv.next.9 = add nuw nsw i64 %indvars.iv, 1600
				%cmp.9 = icmp ult i64 %indvars.iv.next.9, 11200
				br i1 %cmp.9, label %for.body, label %for.cond.cleanup

				for.cond.cleanup:
				ret void
				}

llvm/test/CodeGen/SystemZ/prefetch-04.ll

This file was added.

				; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -prefetch-distance=20 \
				; RUN: -loop-prefetch-writes -stop-after=loop-data-prefetch \| FileCheck %s
				;
				; Check that for a load followed by a store to the same address gets a single
				; write prefetch.
				;
				; CHECK-LABEL: for.body
				; CHECK: call void @llvm.prefetch.p0i8(i8* %scevgep{{.*}}, i32 1, i32 3, i32 1
				; CHECK-not: call void @llvm.prefetch

				define void @fun(i32* nocapture %Src, i32* nocapture readonly %Dst) {
				entry:
				br label %for.body

				for.body:
				%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next.9, %for.body ]
				%arrayidx = getelementptr inbounds i32, i32* %Dst, i64 %indvars.iv
				%0 = load i32, i32* %arrayidx, align 4
				%a = add i32 %0, 128
				store i32 %a, i32* %arrayidx, align 4
				%indvars.iv.next.9 = add nuw nsw i64 %indvars.iv, 1600
				%cmp.9 = icmp ult i64 %indvars.iv.next.9, 11200
				br i1 %cmp.9, label %for.body, label %for.cond.cleanup

				for.cond.cleanup:
				ret void
				}

This is an archive of the discontinued LLVM Phabricator instance.

[LoopDataPrefetch + SystemZ] Let target decide on prefetching on a per loop basis
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 254502

llvm/include/llvm/Analysis/TargetTransformInfo.h

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

llvm/include/llvm/CodeGen/BasicTTIImpl.h

llvm/include/llvm/MC/MCSubtargetInfo.h

llvm/lib/Analysis/TargetTransformInfo.cpp

llvm/lib/MC/MCSubtargetInfo.cpp

llvm/lib/Target/AArch64/AArch64Subtarget.h

llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h

llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp

llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp

llvm/test/CodeGen/SystemZ/prefetch-02.ll

llvm/test/CodeGen/SystemZ/prefetch-03.ll

llvm/test/CodeGen/SystemZ/prefetch-04.ll

This is an archive of the discontinued LLVM Phabricator instance.

[LoopDataPrefetch + SystemZ] Let target decide on prefetching on a per loop basisClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 254502

llvm/include/llvm/Analysis/TargetTransformInfo.h

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

llvm/include/llvm/CodeGen/BasicTTIImpl.h

llvm/include/llvm/MC/MCSubtargetInfo.h

llvm/lib/Analysis/TargetTransformInfo.cpp

llvm/lib/MC/MCSubtargetInfo.cpp

llvm/lib/Target/AArch64/AArch64Subtarget.h

llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h

llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp

llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp

llvm/test/CodeGen/SystemZ/prefetch-02.ll

llvm/test/CodeGen/SystemZ/prefetch-03.ll

llvm/test/CodeGen/SystemZ/prefetch-04.ll

[LoopDataPrefetch + SystemZ] Let target decide on prefetching on a per loop basis
ClosedPublic