Diff 59767

include/llvm/Analysis/TargetTransformInfo.h

Show First 20 Lines • Show All 453 Lines • ▼ Show 20 Lines	public:
/// performed.		/// performed.
unsigned getMaxPrefetchIterationsAhead() const;		unsigned getMaxPrefetchIterationsAhead() const;

/// \return The maximum interleave factor that any transform should try to		/// \return The maximum interleave factor that any transform should try to
/// perform for this target. This number depends on the level of parallelism		/// perform for this target. This number depends on the level of parallelism
/// and the number of execution units in the CPU.		/// and the number of execution units in the CPU.
unsigned getMaxInterleaveFactor(unsigned VF) const;		unsigned getMaxInterleaveFactor(unsigned VF) const;

		/// \rreturn The maximum number of store operations permitted to replace a
		/// call to llvm.memset.
		unsigned getMaxStoresPerMemset(Function &F) const;

		qcolombetUnsubmitted Not Done Reply Inline Actions What about the MinSize attribute? (Oz) Shouldn’t we just pass the Function (or its directly its list of attribute) and let the target checks whatever attribute it feels appropriate? qcolombet: What about the MinSize attribute? (Oz) Shouldn’t we just pass the Function (or its directly its…
		qcolombetUnsubmitted Not Done Reply Inline Actions Is nullptr permitted for F? If not, then use a reference. qcolombet: Is nullptr permitted for F? If not, then use a reference.
/// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.		/// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
int getArithmeticInstrCost(		int getArithmeticInstrCost(
unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,		unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
OperandValueKind Opd2Info = OK_AnyValue,		OperandValueKind Opd2Info = OK_AnyValue,
OperandValueProperties Opd1PropInfo = OP_None,		OperandValueProperties Opd1PropInfo = OP_None,
OperandValueProperties Opd2PropInfo = OP_None) const;		OperandValueProperties Opd2PropInfo = OP_None) const;

/// \return The cost of a shuffle instruction of kind Kind and of type Tp.		/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
▲ Show 20 Lines • Show All 184 Lines • ▼ Show 20 Lines	virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) = 0;		Type *Ty) = 0;
virtual unsigned getNumberOfRegisters(bool Vector) = 0;		virtual unsigned getNumberOfRegisters(bool Vector) = 0;
virtual unsigned getRegisterBitWidth(bool Vector) = 0;		virtual unsigned getRegisterBitWidth(bool Vector) = 0;
virtual unsigned getCacheLineSize() = 0;		virtual unsigned getCacheLineSize() = 0;
virtual unsigned getPrefetchDistance() = 0;		virtual unsigned getPrefetchDistance() = 0;
virtual unsigned getMinPrefetchStride() = 0;		virtual unsigned getMinPrefetchStride() = 0;
virtual unsigned getMaxPrefetchIterationsAhead() = 0;		virtual unsigned getMaxPrefetchIterationsAhead() = 0;
virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;		virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
		virtual unsigned getMaxStoresPerMemset(Function &F) = 0;
virtual unsigned		virtual unsigned
getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,		getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
OperandValueKind Opd2Info,		OperandValueKind Opd2Info,
OperandValueProperties Opd1PropInfo,		OperandValueProperties Opd1PropInfo,
OperandValueProperties Opd2PropInfo) = 0;		OperandValueProperties Opd2PropInfo) = 0;
virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,		virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) = 0;		Type *SubTp) = 0;
virtual int getCastInstrCost(unsigned Opcode, Type Dst, Type Src) = 0;		virtual int getCastInstrCost(unsigned Opcode, Type Dst, Type Src) = 0;
▲ Show 20 Lines • Show All 172 Lines • ▼ Show 20 Lines	unsigned getMinPrefetchStride() override {
return Impl.getMinPrefetchStride();		return Impl.getMinPrefetchStride();
}		}
unsigned getMaxPrefetchIterationsAhead() override {		unsigned getMaxPrefetchIterationsAhead() override {
return Impl.getMaxPrefetchIterationsAhead();		return Impl.getMaxPrefetchIterationsAhead();
}		}
unsigned getMaxInterleaveFactor(unsigned VF) override {		unsigned getMaxInterleaveFactor(unsigned VF) override {
return Impl.getMaxInterleaveFactor(VF);		return Impl.getMaxInterleaveFactor(VF);
}		}
		unsigned getMaxStoresPerMemset(Function &F) override {
		return Impl.getMaxStoresPerMemset(F);
		}
unsigned		unsigned
getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,		getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
OperandValueKind Opd2Info,		OperandValueKind Opd2Info,
OperandValueProperties Opd1PropInfo,		OperandValueProperties Opd1PropInfo,
OperandValueProperties Opd2PropInfo) override {		OperandValueProperties Opd2PropInfo) override {
return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,		return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
Opd1PropInfo, Opd2PropInfo);		Opd1PropInfo, Opd2PropInfo);
}		}
▲ Show 20 Lines • Show All 180 Lines • Show Last 20 Lines

include/llvm/Analysis/TargetTransformInfoImpl.h

Show First 20 Lines • Show All 272 Lines • ▼ Show 20 Lines	public:
unsigned getPrefetchDistance() { return 0; }		unsigned getPrefetchDistance() { return 0; }

unsigned getMinPrefetchStride() { return 1; }		unsigned getMinPrefetchStride() { return 1; }

unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; }		unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; }

unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }		unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }

		unsigned getMaxStoresPerMemset(Function &F) { return 0; }

unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,		unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info,		TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info,		TTI::OperandValueKind Opd2Info,
TTI::OperandValueProperties Opd1PropInfo,		TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {		TTI::OperandValueProperties Opd2PropInfo) {
return 1;		return 1;
}		}

▲ Show 20 Lines • Show All 241 Lines • Show Last 20 Lines

include/llvm/CodeGen/BasicTTIImpl.h

Show First 20 Lines • Show All 278 Lines • ▼ Show 20 Lines	public:
/// @{		/// @{

unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }		unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }

unsigned getRegisterBitWidth(bool Vector) { return 32; }		unsigned getRegisterBitWidth(bool Vector) { return 32; }

unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }		unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }

		unsigned getMaxStoresPerMemset(Function &F) {
		bool OptSize = F.optForMinSize() \|\| F.optForMinSize();
		return getTLI()->getMaxStoresPerMemset(OptSize);
		}

unsigned getArithmeticInstrCost(		unsigned getArithmeticInstrCost(
unsigned Opcode, Type *Ty,		unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,		TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,		TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,		TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None) {		TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None) {
// Check if any of the operands are vector operands.		// Check if any of the operands are vector operands.
const TargetLoweringBase *TLI = getTLI();		const TargetLoweringBase *TLI = getTLI();
▲ Show 20 Lines • Show All 595 Lines • Show Last 20 Lines

lib/Analysis/TargetTransformInfo.cpp

	Show First 20 Lines • Show All 237 Lines • ▼ Show 20 Lines
	unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const {			unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const {
	return TTIImpl->getMaxPrefetchIterationsAhead();			return TTIImpl->getMaxPrefetchIterationsAhead();
	}			}

	unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {			unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
	return TTIImpl->getMaxInterleaveFactor(VF);			return TTIImpl->getMaxInterleaveFactor(VF);
	}			}

				unsigned TargetTransformInfo::getMaxStoresPerMemset(Function &F) const {
				return TTIImpl->getMaxStoresPerMemset(F);
				}

	int TargetTransformInfo::getArithmeticInstrCost(			int TargetTransformInfo::getArithmeticInstrCost(
	unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,			unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
	OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,			OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
	OperandValueProperties Opd2PropInfo) const {			OperandValueProperties Opd2PropInfo) const {
	int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,			int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
	Opd1PropInfo, Opd2PropInfo);			Opd1PropInfo, Opd2PropInfo);
	assert(Cost >= 0 && "TTI should not produce negative costs!");			assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;			return Cost;
	▲ Show 20 Lines • Show All 187 Lines • Show Last 20 Lines

lib/Transforms/Scalar/DeadStoreElimination.cpp

Show All 19 Lines
#include "llvm/ADT/SetVector.h"		#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"		#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"		#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"		#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/GlobalsModRef.h"		#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/MemoryBuiltins.h"		#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"		#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"		#include "llvm/Analysis/TargetLibraryInfo.h"
		#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"		#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"		#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"		#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"		#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"		#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"		#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"		#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"		#include "llvm/IR/IntrinsicInst.h"
▲ Show 20 Lines • Show All 167 Lines • ▼ Show 20 Lines	static bool isRemovable(Instruction *I) {
}		}

if (auto CS = CallSite(I))		if (auto CS = CallSite(I))
return CS.getInstruction()->use_empty();		return CS.getInstruction()->use_empty();

return false;		return false;
}		}

		static unsigned getExpectedNumStores(unsigned SizeInBytes,
		unsigned MaxIntSizeInBytes) {
		// Since we don't have perfect knowledge here, assume that the maximum GPR
		// width is the same size as the largest legal integer size.
		unsigned NumWideStores = (SizeInBytes / MaxIntSizeInBytes);
		// Conservatively assume the remaining bytes as a byte at a time.
		unsigned NumNarrowStores = (SizeInBytes % MaxIntSizeInBytes);
		return NumWideStores + NumNarrowStores;
		}

		/// Return true if splitting memset into two parts is profitable in terms of
		/// the number of stores when lowered from the memset.
		static bool isSplittingProfitable(Instruction *EarlierStore,
		int64_t NewLengthPart1,
		int64_t NewLengthPart2, const DataLayout &DL,
		const TargetTransformInfo *TTI) {
		// FIXME: Split only memset for now. Supporting memcpy/memmove is also
		// possible.
		MemSetInst *MSI = dyn_cast<MemSetInst>(EarlierStore);
		if (!MSI)
		return false;

		unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
		// Return false if we don't have information about the legal integer size.
		if (MaxIntSize == 0)
		return false;

		dberlinUnsubmitted Not Done Reply Inline Actions How and when can this happen? dberlin: How and when can this happen?
		junbumlAuthorUnsubmitted Not Done Reply Inline Actions If there is no information about LegalIntWidth extracted from datalayout, this could be 0. If we remove the target datalayout in input IR, this will be 0. junbuml: If there is no information about LegalIntWidth extracted from datalayout, this could be 0. If…
		qcolombetUnsubmitted Not Done Reply Inline Actions Like Daniel said, DL are mandatory now. If that may still happen, (int would be an illegal type??) add a comment on when this is the case and add a test case for that! qcolombet: Like Daniel said, DL are mandatory now. If that may still happen, (int would be an illegal type?
		junbumlAuthorUnsubmitted Not Done Reply Inline Actions As far as I check, I didn't encounter any complain when injecting an IR without "target datalayout" into opt. Also there are many test cases without datalayout. It seems also possible to assign empty string like target datalayout="". Did the mandatory mean in IRs generated from the frontend? If DL.getLargestLegalIntTypeSizeInBits() returns 0, I think we should bail out with the assumption that we don't have proper information about backend. junbuml: As far as I check, I didn't encounter any complain when injecting an IR without "target…
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions Having `DL.getLargestLegalIntTypeSizeInBits()` returning 0 would mean that someone specifically asked for not having int types supported at all. Handling this case means that LLVM acknowledges that it aims at supporting this case. Is it possible/desirable to support this? Otherwise an assertion would be more appropriate IMO. mehdi_amini: Having ` DL.getLargestLegalIntTypeSizeInBits()` returning 0 would mean that someone…
		unsigned OrigLength = cast<ConstantInt>(MSI->getLength())->getZExtValue();
		// Return false if the memset size is equal to the largest integer size
		// because it's going to be only one store, or if the memset size is
		qcolombetUnsubmitted Not Done Reply Inline Actions Add a comment why this is not profitable. Something along the line of “this is going to be only one store”. qcolombet: Add a comment why this is not profitable. Something along the line of “this is going to be only…
		// smaller than the largest integer size because in this case, the total
		// nubmer of narrow stores could be different depending on backend.
		if (OrigLength <= MaxIntSize)
		return false;

		// Check if the existing memset is small enough to be lowered to stores later.
		unsigned NumStoresInOrigMemset = getExpectedNumStores(OrigLength, MaxIntSize);
		if (TTI->getMaxStoresPerMemset(*(MSI->getParent()->getParent())) <
		NumStoresInOrigMemset)
		return false;

		// Make sure that each part is also larger than the largest integer size.
		if (NewLengthPart1 < MaxIntSize \|\| NewLengthPart2 < MaxIntSize)
		return false;

		// The expected number of stores after splitting should be less than the
		// expected number of stores from the original memset.
		return NumStoresInOrigMemset >
		(getExpectedNumStores(NewLengthPart1, MaxIntSize) +
		getExpectedNumStores(NewLengthPart2, MaxIntSize));
		}

		static bool tryToSplitStore(Instruction *EarlierStore, int64_t LaterOffset,
		int64_t EarlierOffset, MemoryLocation &EarlierLoc,
		MemoryLocation &LaterLoc, const DataLayout &DL,
		const TargetTransformInfo *TTI) {

		int64_t NewLengthPart1 = LaterOffset - EarlierOffset;
		int64_t NewLengthPart2 = int64_t(EarlierOffset + EarlierLoc.Size) -
		int64_t(LaterOffset + LaterLoc.Size);

		if (!isSplittingProfitable(EarlierStore, NewLengthPart1, NewLengthPart2, DL,
		TTI))
		return false;

		MemIntrinsic *DepIntrinsic = cast<MemIntrinsic>(EarlierStore);
		unsigned DepWriteAlign = DepIntrinsic->getAlignment();
		int64_t NewWriteOffset = int64_t(LaterOffset + LaterLoc.Size);

		if (!((llvm::isPowerOf2_64(NewWriteOffset) &&
		DepWriteAlign <= NewWriteOffset) \|\|
		((DepWriteAlign != 0) && NewWriteOffset % DepWriteAlign == 0)))
		return false;

		DEBUG(dbgs() << "DSE: Split MemIntrinsic :\n " << *EarlierStore << "\n");
		DEBUG(dbgs() << "into :\n");

		Value *DepWriteLength = DepIntrinsic->getLength();
		Value *NewLengthPart1Val =
		ConstantInt::get(DepWriteLength->getType(), NewLengthPart1);
		Value *NewLengthPart2Val =
		ConstantInt::get(DepWriteLength->getType(), NewLengthPart2);

		DepIntrinsic->setLength(NewLengthPart1Val);
		DEBUG(dbgs() << " " << *DepIntrinsic << "\n");

		int64_t OffsetMoved = NewWriteOffset - EarlierOffset;
		Value *Indices[1] = {
		ConstantInt::get(DepWriteLength->getType(), OffsetMoved)};
		GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
		DepIntrinsic->getRawDest(), Indices, "", EarlierStore);
		MemSetInst *DepIntrinsic2 = dyn_cast<MemSetInst>(DepIntrinsic->clone());
		DepIntrinsic2->setDest(NewDestGEP);
		DepIntrinsic2->setLength(NewLengthPart2Val);
		DepIntrinsic2->insertAfter(DepIntrinsic);
		DEBUG(dbgs() << " " << *DepIntrinsic2 << "\n");
		return true;
		}

/// Returns true if the end of this instruction can be safely shortened in		/// Returns true if the end of this instruction can be safely shortened in
/// length.		/// length.
static bool isShortenableAtTheEnd(Instruction *I) {		static bool isShortenableAtTheEnd(Instruction *I) {
// Don't shorten stores for now		// Don't shorten stores for now
if (isa<StoreInst>(I))		if (isa<StoreInst>(I))
return false;		return false;

▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines	static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
return MemoryLocation::UnknownSize;		return MemoryLocation::UnknownSize;
}		}

namespace {		namespace {
enum OverwriteResult {		enum OverwriteResult {
OverwriteBegin,		OverwriteBegin,
OverwriteComplete,		OverwriteComplete,
OverwriteEnd,		OverwriteEnd,
		OverwritePartial,
OverwriteUnknown		OverwriteUnknown
};		};
}		}

/// Return 'OverwriteComplete' if a store to the 'Later' location completely		/// Return 'OverwriteComplete' if a store to the 'Later' location completely
/// overwrites a store to the 'Earlier' location, 'OverwriteEnd' if the end of		/// overwrites a store to the 'Earlier' location, 'OverwriteEnd' if the end of
/// the 'Earlier' location is completely overwritten by 'Later',		/// the 'Earlier' location is completely overwritten by 'Later',
/// 'OverwriteBegin' if the beginning of the 'Earlier' location is overwritten		/// 'OverwriteBegin' if the beginning of the 'Earlier' location is overwritten
▲ Show 20 Lines • Show All 83 Lines • ▼ Show 20 Lines	static OverwriteResult isOverwrite(const MemoryLocation &Later,
//		//
// In this case we may want to trim the size of earlier to avoid generating		// In this case we may want to trim the size of earlier to avoid generating
// writes to addresses which will definitely be overwritten later		// writes to addresses which will definitely be overwritten later
if (LaterOff > EarlierOff &&		if (LaterOff > EarlierOff &&
LaterOff < int64_t(EarlierOff + Earlier.Size) &&		LaterOff < int64_t(EarlierOff + Earlier.Size) &&
int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))		int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
return OverwriteEnd;		return OverwriteEnd;

// Finally, we also need to check if the later store overwrites the beginning		// We also need to check if the later store overwrites the beginning of the
// of the earlier store.		// earlier store.
//		//
// \|--earlier--\|		// \|--earlier--\|
// \|-- later --\|		// \|-- later --\|
//		//
// In this case we may want to move the destination address and trim the size		// In this case we may want to move the destination address and trim the size
// of earlier to avoid generating writes to addresses which will definitely		// of earlier to avoid generating writes to addresses which will definitely
// be overwritten later.		// be overwritten later.
if (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff) {		if (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff) {
assert (int64_t(LaterOff + Later.Size) < int64_t(EarlierOff + Earlier.Size)		assert (int64_t(LaterOff + Later.Size) < int64_t(EarlierOff + Earlier.Size)
&& "Expect to be handled as OverwriteComplete" );		&& "Expect to be handled as OverwriteComplete" );
return OverwriteBegin;		return OverwriteBegin;
}		}

		// Finally, we need to check if the later store partially overwrites the
		qcolombetUnsubmitted Not Done Reply Inline Actions Add a comment with ASCII art like the other cases to explain what we handle here. qcolombet: Add a comment with ASCII art like the other cases to explain what we handle here.
		// earlier in the middle.
		// \|------earlier------\|
		// \|-- later --\|
		//
		// In this case we may want to split the earlier just to write to addresses
		// which are not covered by later.
		if (LaterOff > EarlierOff &&
		int64_t(LaterOff + Later.Size) < int64_t(EarlierOff + Earlier.Size)) {
		return OverwritePartial;
		}

// Otherwise, they don't completely overlap.		// Otherwise, they don't completely overlap.
return OverwriteUnknown;		return OverwriteUnknown;
}		}

/// If 'Inst' might be a self read (i.e. a noop copy of a		/// If 'Inst' might be a self read (i.e. a noop copy of a
/// memory region into an identical pointer) then it doesn't actually make its		/// memory region into an identical pointer) then it doesn't actually make its
/// input dead in the traditional sense. Consider this case:		/// input dead in the traditional sense. Consider this case:
///		///
▲ Show 20 Lines • Show All 343 Lines • ▼ Show 20 Lines	if (DeadStackObjects.empty())
break;		break;
}		}

return MadeChange;		return MadeChange;
}		}

static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,		static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
MemoryDependenceResults MD, DominatorTree DT,		MemoryDependenceResults MD, DominatorTree DT,
const TargetLibraryInfo *TLI) {		const TargetLibraryInfo *TLI,
		const TargetTransformInfo *TTI) {
const DataLayout &DL = BB.getModule()->getDataLayout();		const DataLayout &DL = BB.getModule()->getDataLayout();
bool MadeChange = false;		bool MadeChange = false;

// Do a top-down walk on the BB.		// Do a top-down walk on the BB.
for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {		for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
Instruction Inst = &BBI++;		Instruction Inst = &BBI++;

// Handle 'free' calls specially.		// Handle 'free' calls specially.
▲ Show 20 Lines • Show All 149 Lines • ▼ Show 20 Lines	while (InstDep.isDef() \|\| InstDep.isClobber()) {
Value *Indices[1] = {		Value *Indices[1] = {
ConstantInt::get(DepWriteLength->getType(), OffsetMoved)};		ConstantInt::get(DepWriteLength->getType(), OffsetMoved)};
GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(		GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
DepIntrinsic->getRawDest(), Indices, "", DepWrite);		DepIntrinsic->getRawDest(), Indices, "", DepWrite);
DepIntrinsic->setDest(NewDestGEP);		DepIntrinsic->setDest(NewDestGEP);
}		}
MadeChange = true;		MadeChange = true;
}		}
		} else if (OR == OverwritePartial) {
		if (tryToSplitStore(DepWrite, InstWriteOffset, DepWriteOffset, DepLoc,
		Loc, DL, TTI))
		MadeChange = true;
}		}
}		}

// If this is a may-aliased store that is clobbering the store value, we		// If this is a may-aliased store that is clobbering the store value, we
// can keep searching past it for another must-aliased pointer that stores		// can keep searching past it for another must-aliased pointer that stores
// to the same location. For example, in:		// to the same location. For example, in:
// store -> P		// store -> P
// store -> Q		// store -> Q
Show All 16 Lines	static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
if (BB.getTerminator()->getNumSuccessors() == 0)		if (BB.getTerminator()->getNumSuccessors() == 0)
MadeChange \|= handleEndBlock(BB, AA, MD, TLI);		MadeChange \|= handleEndBlock(BB, AA, MD, TLI);

return MadeChange;		return MadeChange;
}		}

static bool eliminateDeadStores(Function &F, AliasAnalysis *AA,		static bool eliminateDeadStores(Function &F, AliasAnalysis *AA,
MemoryDependenceResults MD, DominatorTree DT,		MemoryDependenceResults MD, DominatorTree DT,
const TargetLibraryInfo *TLI) {		const TargetLibraryInfo *TLI,
		const TargetTransformInfo *TTI) {
bool MadeChange = false;		bool MadeChange = false;
for (BasicBlock &BB : F)		for (BasicBlock &BB : F)
		qcolombetUnsubmitted Not Done Reply Inline Actions Make a method out of this to break the nested indentation. qcolombet: Make a method out of this to break the nested indentation.
// Only check non-dead blocks. Dead blocks may have strange pointer		// Only check non-dead blocks. Dead blocks may have strange pointer
// cycles that will confuse alias analysis.		// cycles that will confuse alias analysis.
if (DT->isReachableFromEntry(&BB))		if (DT->isReachableFromEntry(&BB))
MadeChange \|= eliminateDeadStores(BB, AA, MD, DT, TLI);		MadeChange \|= eliminateDeadStores(BB, AA, MD, DT, TLI, TTI);
return MadeChange;		return MadeChange;
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// DSE Pass		// DSE Pass
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {		PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
AliasAnalysis *AA = &AM.getResult<AAManager>(F);		AliasAnalysis *AA = &AM.getResult<AAManager>(F);
DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);		DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
MemoryDependenceResults *MD = &AM.getResult<MemoryDependenceAnalysis>(F);		MemoryDependenceResults *MD = &AM.getResult<MemoryDependenceAnalysis>(F);
const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F);		const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
		const TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);

if (!eliminateDeadStores(F, AA, MD, DT, TLI))		if (!eliminateDeadStores(F, AA, MD, DT, TLI, TTI))
return PreservedAnalyses::all();		return PreservedAnalyses::all();
PreservedAnalyses PA;		PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();		PA.preserve<DominatorTreeAnalysis>();
PA.preserve<GlobalsAA>();		PA.preserve<GlobalsAA>();
PA.preserve<MemoryDependenceAnalysis>();		PA.preserve<MemoryDependenceAnalysis>();
return PA;		return PA;
}		}

Show All 9 Lines	if (skipFunction(F))
return false;		return false;

DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();		DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();		AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
MemoryDependenceResults *MD =		MemoryDependenceResults *MD =
&getAnalysis<MemoryDependenceWrapperPass>().getMemDep();		&getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
const TargetLibraryInfo *TLI =		const TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();		&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
		const TargetTransformInfo *TTI =
		&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);

return eliminateDeadStores(F, AA, MD, DT, TLI);		return eliminateDeadStores(F, AA, MD, DT, TLI, TTI);
}		}

void getAnalysisUsage(AnalysisUsage &AU) const override {		void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();		AU.setPreservesCFG();
AU.addRequired<DominatorTreeWrapperPass>();		AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();		AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<MemoryDependenceWrapperPass>();		AU.addRequired<MemoryDependenceWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();		AU.addRequired<TargetLibraryInfoWrapperPass>();
		AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();		AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();		AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<MemoryDependenceWrapperPass>();		AU.addPreserved<MemoryDependenceWrapperPass>();
}		}

static char ID; // Pass identification, replacement for typeid		static char ID; // Pass identification, replacement for typeid
};		};

char DSELegacyPass::ID = 0;		char DSELegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,		INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
false)		false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)		INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)		INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)		INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)		INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)		INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
		INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,		INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
false)		false)

FunctionPass *llvm::createDeadStoreEliminationPass() {		FunctionPass *llvm::createDeadStoreEliminationPass() {
return new DSELegacyPass();		return new DSELegacyPass();
}		}

test/CodeGen/AArch64/aarch64-small-memset-lowering.ll

This file was added.

				; RUN: llc -mtriple=arm64-linux-gnu < %s \| FileCheck %s

				target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
				target triple = "aarch64--linux-gnu"

				define void @test_32_8to15(i64* nocapture %P, i64 %n64) {
				; CHECK-LABEL: test_32_8to15
				; CHECK: stp xzr, xzr, [x0, #16]
				; CHECK: stp xzr, x1, [x0]
				entry:
				%Base = bitcast i64* %P to i8*
				%Base2 = getelementptr inbounds i8, i8* %Base, i64 16
				call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 8, i32 8, i1 false)
				call void @llvm.memset.p0i8.i64(i8* %Base2, i8 0, i64 16, i32 8, i1 false)
				%arrayidx1 = getelementptr inbounds i64, i64* %P, i64 1
				store i64 %n64, i64* %arrayidx1
				ret void
				}

				define void @test_32_8to23(i64* nocapture %P, i64 %n64) {
				; CHECK-LABEL: test_32_8to23
				; CHECK: stp [[REG:x[0-9]+]], xzr, [x0, #16]
				; CHECK: stp xzr, [[REG]], [x0]
				entry:
				%Base = bitcast i64* %P to i8*
				%Base2 = getelementptr inbounds i8, i8* %Base, i64 24
				call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 8, i32 8, i1 false)
				call void @llvm.memset.p0i8.i64(i8* %Base2, i8 0, i64 8, i32 8, i1 false)
				%arrayidx2 = getelementptr inbounds i8, i8* %Base, i64 8
				call void @llvm.memset.p0i8.i64(i8* %arrayidx2, i8 1, i64 16, i32 8, i1 false)
				ret void
				}

				declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)

test/Transforms/DeadStoreElimination/SplitMemintrinsic.ll

This file was added.

				; RUN: opt < %s -basicaa -dse -S \| FileCheck %s

				qcolombetUnsubmitted Not Done Reply Inline Actions I would prefer two different tests: One that checks that dse does what is expected. One that checks that given such input (statically available, not produced by opt), the backend does what is expected. qcolombet: I would prefer two different tests: 1. One that checks that dse does what is expected. 2. One…
				target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
				target triple = "aarch64--linux-gnu"

				define void @test_32_8to15(i64* nocapture %P, i64 %n64) {
				; CHECK: [[BASE2:%[0-9]+]] = getelementptr inbounds i8, i8* %Base, i64 16
				; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 8, i32 8, i1 false)
				qcolombetUnsubmitted Not Done Reply Inline Actions Don’t hard code %0. Use a regexp. qcolombet: Don’t hard code %0. Use a regexp.
				; CHECK: call void @llvm.memset.p0i8.i64(i8* [[BASE2]], i8 0, i64 16, i32 8, i1 false)
				entry:
				%Base = bitcast i64* %P to i8*
				dberlinUnsubmitted Not Done Reply Inline Actions Errr, so what gets eliminated here? dberlin: Errr, so what gets eliminated here?
				call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 32, i32 8, i1 false)
				%arrayidx1 = getelementptr inbounds i64, i64* %P, i64 1
				store i64 %n64, i64* %arrayidx1
				ret void
				}

				define void @test_32_8to23(i64* nocapture %P, i64 %n64) {
				; CHECK-LABEL: test_32_8to23
				; CHECK: [[BASE2:%[0-9]+]] = getelementptr inbounds i8, i8* %Base, i64 24
				; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 8, i32 8, i1 false)
				; CHECK: call void @llvm.memset.p0i8.i64(i8* [[BASE2]], i8 0, i64 8, i32 8, i1 false)
				entry:
				%Base = bitcast i64* %P to i8*
				call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 32, i32 8, i1 false)
				%arrayidx2 = getelementptr inbounds i8, i8* %Base, i64 8
				call void @llvm.memset.p0i8.i64(i8* %arrayidx2, i8 1, i64 16, i32 8, i1 false)
				ret void
				}

				define void @test_32_4x8(i64* nocapture %P, i64 %n64) {
				; The memset should be completely removed by later overlapped stores.
				; CHECK-LABEL: @test_32_4x8(
				; CHECK-NOT: call void @llvm.memset.p0i8.i64
				entry:
				dberlinUnsubmitted Not Done Reply Inline Actions Again, if your goal is to test that perform dead store elimination, you should test that, not just test that we turn the memset into stores. dberlin: Again, if your goal is to test that perform dead store elimination, you should test that, not…
				%Base = bitcast i64* %P to i8*
				call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 32, i32 8, i1 false)
				; P[2]
				%arrayidx2 = getelementptr inbounds i64, i64* %P, i64 2
				store i64 %n64, i64* %arrayidx2
				; P[3]
				%arrayidx3 = getelementptr inbounds i64, i64* %P, i64 3
				store i64 %n64, i64* %arrayidx3
				; P[1]
				%arrayidx1 = getelementptr inbounds i64, i64* %P, i64 1
				store i64 %n64, i64* %arrayidx1
				; P[0]
				%arrayidx0 = getelementptr inbounds i64, i64* %P, i64 0
				store i64 %n64, i64* %arrayidx0
				ret void
				}

				define void @test_32_8to11(i64* nocapture %P, i32 %n32) {
				; Splitting memset may be unprofitable as the laster store could cause
				; fractions if split.
				; CHECK-LABEL: test_32_8to11
				; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 32, i32 8, i1 false)
				entry:
				%Base = bitcast i64* %P to i8*
				call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 32, i32 8, i1 false)
				%arrayidx1 = getelementptr inbounds i64, i64* %P, i64 1
				%arrayidx1_32 = bitcast i64* %arrayidx1 to i32*
				store i32 %n32, i32* %arrayidx1_32
				ret void
				}

				define void @test_34_8to9(i64* nocapture %P, i16 %n16) {
				; This should not be split due to the alignment in the 2nd memset if split.
				; CHECK-LABEL: test_34_8to9
				; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 34, i32 8, i1 false)
				entry:
				%Base = bitcast i64* %P to i8*
				call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 34, i32 8, i1 false)
				%arrayidx1 = getelementptr inbounds i64, i64* %P, i64 1
				%arrayidx1_16 = bitcast i64* %arrayidx1 to i16*
				store i16 %n16, i16* %arrayidx1_16
				ret void
				}

				qcolombetUnsubmitted Not Done Reply Inline Actions Use opt -instnamer qcolombet: Use opt -instnamer
				declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)

This is an archive of the discontinued LLVM Phabricator instance.

[DSE]Split memset when the memset is small enough to be lowered to stores
Needs ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 59767

include/llvm/Analysis/TargetTransformInfo.h

include/llvm/Analysis/TargetTransformInfoImpl.h

include/llvm/CodeGen/BasicTTIImpl.h

lib/Analysis/TargetTransformInfo.cpp

lib/Transforms/Scalar/DeadStoreElimination.cpp

test/CodeGen/AArch64/aarch64-small-memset-lowering.ll

test/Transforms/DeadStoreElimination/SplitMemintrinsic.ll

This is an archive of the discontinued LLVM Phabricator instance.

[DSE]Split memset when the memset is small enough to be lowered to storesNeeds ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 59767

include/llvm/Analysis/TargetTransformInfo.h

include/llvm/Analysis/TargetTransformInfoImpl.h

include/llvm/CodeGen/BasicTTIImpl.h

lib/Analysis/TargetTransformInfo.cpp

lib/Transforms/Scalar/DeadStoreElimination.cpp

test/CodeGen/AArch64/aarch64-small-memset-lowering.ll

test/Transforms/DeadStoreElimination/SplitMemintrinsic.ll

[DSE]Split memset when the memset is small enough to be lowered to stores
Needs ReviewPublic