Diff 478406

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

Show First 20 Lines • Show All 180 Lines • ▼ Show 20 Lines	vectorizeLoadChain(ArrayRef<Instruction *> Chain,
SmallPtrSet<Instruction , 16> InstructionsProcessed);		SmallPtrSet<Instruction , 16> InstructionsProcessed);

/// Vectorizes the store instructions in Chain.		/// Vectorizes the store instructions in Chain.
bool		bool
vectorizeStoreChain(ArrayRef<Instruction *> Chain,		vectorizeStoreChain(ArrayRef<Instruction *> Chain,
SmallPtrSet<Instruction , 16> InstructionsProcessed);		SmallPtrSet<Instruction , 16> InstructionsProcessed);

/// Check if this load/store access is misaligned accesses.		/// Check if this load/store access is misaligned accesses.
		/// Returns a \p RelativeSpeed of an operation if allowed suitable to
		/// compare to another result for the same \p AddressSpace and potentially
		/// different \p Alignment and \p SzInBytes.
bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,		bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
Align Alignment);		Align Alignment, unsigned &RelativeSpeed);
};		};

class LoadStoreVectorizerLegacyPass : public FunctionPass {		class LoadStoreVectorizerLegacyPass : public FunctionPass {
public:		public:
static char ID;		static char ID;

LoadStoreVectorizerLegacyPass() : FunctionPass(ID) {		LoadStoreVectorizerLegacyPass() : FunctionPass(ID) {
initializeLoadStoreVectorizerLegacyPassPass(*PassRegistry::getPassRegistry());		initializeLoadStoreVectorizerLegacyPassPass(*PassRegistry::getPassRegistry());
▲ Show 20 Lines • Show All 874 Lines • ▼ Show 20 Lines	for (Instruction *I : Chain)
dbgs() << " " << *I << "\n";		dbgs() << " " << *I << "\n";
});		});

// We won't try again to vectorize the elements of the chain, regardless of		// We won't try again to vectorize the elements of the chain, regardless of
// whether we succeed below.		// whether we succeed below.
InstructionsProcessed->insert(Chain.begin(), Chain.end());		InstructionsProcessed->insert(Chain.begin(), Chain.end());

// If the store is going to be misaligned, don't vectorize it.		// If the store is going to be misaligned, don't vectorize it.
if (accessIsMisaligned(SzInBytes, AS, Alignment)) {		unsigned RelativeSpeed;
		if (accessIsMisaligned(SzInBytes, AS, Alignment, RelativeSpeed)) {
		arsenmUnsubmitted Done Reply Inline Actions Maybe rename to RelativeCost or something? arsenm: Maybe rename to RelativeCost or something?
if (S0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) {		if (S0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) {
		unsigned SpeedBefore;
		accessIsMisaligned(EltSzInBytes, AS, Alignment, SpeedBefore);
		if (SpeedBefore > RelativeSpeed)
		foadUnsubmitted Not Done Reply Inline Actions It is not at all clear to me whether it makes sense to compare `FastBefore` with `Fast` directly, or whether one of them needs to be multiplied by `ChainSize`. Especially since the definition of `Fast` is target-specific. foad: It is not at all clear to me whether it makes sense to compare `FastBefore` with `Fast`…
		rampitecAuthorUnsubmitted Done Reply Inline Actions I thought about this, but 'fast' is not an actual memory transfer speed like Gb/s, it is a speed rank. Defining real memory speed will be hard to do, especially given alignment considerations. I do not think this is additive like a cost. It can be compared, but hardly added or multiplied. More so when an ultimate 'slow' is still zero. rampitec: I thought about this, but 'fast' is not an actual memory transfer speed like Gb/s, it is a…
		foadUnsubmitted Not Done Reply Inline Actions I still don't understand how generic code can meaningfully compare, for example, one load with speed rank 42 vs two loads with speed rank 99. Are you supposed to ignore the number of loads completely, and just compare the speed rank numbers? foad: I still don't understand how generic code can meaningfully compare, for example, one load with…
		rampitecAuthorUnsubmitted Done Reply Inline Actions Yes, the rank behaves more like a throughput. It is not precisely a throughput because nobody can guarantee it, but a similar concept. rampitec: Yes, the rank behaves more like a throughput. It is not precisely a throughput because nobody…
		return false;

auto Chains = splitOddVectorElts(Chain, Sz);		auto Chains = splitOddVectorElts(Chain, Sz);
bool Vectorized = false;		bool Vectorized = false;
Vectorized \|= vectorizeStoreChain(Chains.first, InstructionsProcessed);		Vectorized \|= vectorizeStoreChain(Chains.first, InstructionsProcessed);
Vectorized \|= vectorizeStoreChain(Chains.second, InstructionsProcessed);		Vectorized \|= vectorizeStoreChain(Chains.second, InstructionsProcessed);
return Vectorized;		return Vectorized;
}		}

Align NewAlign = getOrEnforceKnownAlignment(S0->getPointerOperand(),		Align NewAlign = getOrEnforceKnownAlignment(S0->getPointerOperand(),
▲ Show 20 Lines • Show All 135 Lines • ▼ Show 20 Lines	if (ChainSize > VF \|\| (VF != TargetVF && TargetVF < ChainSize)) {
return Vectorized;		return Vectorized;
}		}

// We won't try again to vectorize the elements of the chain, regardless of		// We won't try again to vectorize the elements of the chain, regardless of
// whether we succeed below.		// whether we succeed below.
InstructionsProcessed->insert(Chain.begin(), Chain.end());		InstructionsProcessed->insert(Chain.begin(), Chain.end());

// If the load is going to be misaligned, don't vectorize it.		// If the load is going to be misaligned, don't vectorize it.
if (accessIsMisaligned(SzInBytes, AS, Alignment)) {		unsigned RelativeSpeed;
		if (accessIsMisaligned(SzInBytes, AS, Alignment, RelativeSpeed)) {
if (L0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) {		if (L0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) {
		unsigned SpeedBefore;
		accessIsMisaligned(EltSzInBytes, AS, Alignment, SpeedBefore);
		if (SpeedBefore > RelativeSpeed)
		return false;

auto Chains = splitOddVectorElts(Chain, Sz);		auto Chains = splitOddVectorElts(Chain, Sz);
bool Vectorized = false;		bool Vectorized = false;
Vectorized \|= vectorizeLoadChain(Chains.first, InstructionsProcessed);		Vectorized \|= vectorizeLoadChain(Chains.first, InstructionsProcessed);
Vectorized \|= vectorizeLoadChain(Chains.second, InstructionsProcessed);		Vectorized \|= vectorizeLoadChain(Chains.second, InstructionsProcessed);
return Vectorized;		return Vectorized;
}		}

Align NewAlign = getOrEnforceKnownAlignment(L0->getPointerOperand(),		Align NewAlign = getOrEnforceKnownAlignment(L0->getPointerOperand(),
▲ Show 20 Lines • Show All 67 Lines • ▼ Show 20 Lines	bool Vectorizer::vectorizeLoadChain(
eraseInstructions(Chain);		eraseInstructions(Chain);

++NumVectorInstructions;		++NumVectorInstructions;
NumScalarsVectorized += Chain.size();		NumScalarsVectorized += Chain.size();
return true;		return true;
}		}

bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,		bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
Align Alignment) {		Align Alignment, unsigned &RelativeSpeed) {
		RelativeSpeed = 0;
if (Alignment.value() % SzInBytes == 0)		if (Alignment.value() % SzInBytes == 0)
return false;		return false;

unsigned Fast = 0;
bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(),		bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(),
SzInBytes * 8, AddressSpace,		SzInBytes * 8, AddressSpace,
Alignment, &Fast);		Alignment, &RelativeSpeed);
LLVM_DEBUG(dbgs() << "LSV: Target said misaligned is allowed? " << Allows		LLVM_DEBUG(dbgs() << "LSV: Target said misaligned is allowed? " << Allows
<< " and fast? " << Fast << "\n";);		<< " with relative speed = " << RelativeSpeed << '\n';);
return !Allows \|\| !Fast;		return !Allows \|\| !RelativeSpeed;
		arsenmUnsubmitted Done Reply Inline Actions Single quote newline arsenm: Single quote newline
}		}

This is an archive of the discontinued LLVM Phabricator instance.

[LoadStoreVectorizer] Consider if operation is faster than before
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 478406

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[LoadStoreVectorizer] Consider if operation is faster than beforeClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 478406

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

[LoadStoreVectorizer] Consider if operation is faster than before
ClosedPublic