Diff 279365

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

Show First 20 Lines • Show All 1,021 Lines • ▼ Show 20 Lines	bool Vectorizer::vectorizeStoreChain(
Chain = NewChain;		Chain = NewChain;
ChainSize = Chain.size();		ChainSize = Chain.size();

// Check if it's legal to vectorize this chain. If not, split the chain and		// Check if it's legal to vectorize this chain. If not, split the chain and
// try again.		// try again.
unsigned EltSzInBytes = Sz / 8;		unsigned EltSzInBytes = Sz / 8;
unsigned SzInBytes = EltSzInBytes * ChainSize;		unsigned SzInBytes = EltSzInBytes * ChainSize;

VectorType *VecTy;		VectorType *VecTy;
		spatelUnsubmitted Done Reply Inline Actions Can VecTy be changed to FixedVectorType? spatel:* Can VecTy be changed to FixedVectorType*?
VectorType *VecStoreTy = dyn_cast<VectorType>(StoreTy);		auto *VecStoreTy = dyn_cast<FixedVectorType>(StoreTy);
if (VecStoreTy)		if (VecStoreTy)
VecTy = FixedVectorType::get(StoreTy->getScalarType(),		VecTy = FixedVectorType::get(StoreTy->getScalarType(),
Chain.size() * VecStoreTy->getNumElements());		Chain.size() * VecStoreTy->getNumElements());
else		else
VecTy = FixedVectorType::get(StoreTy, Chain.size());		VecTy = FixedVectorType::get(StoreTy, Chain.size());

// If it's more than the max vector size or the target has a better		// If it's more than the max vector size or the target has a better
// vector factor, break it into two pieces.		// vector factor, break it into two pieces.
▲ Show 20 Lines • Show All 135 Lines • ▼ Show 20 Lines	bool Vectorizer::vectorizeLoadChain(
Chain = NewChain;		Chain = NewChain;
ChainSize = Chain.size();		ChainSize = Chain.size();

// Check if it's legal to vectorize this chain. If not, split the chain and		// Check if it's legal to vectorize this chain. If not, split the chain and
// try again.		// try again.
unsigned EltSzInBytes = Sz / 8;		unsigned EltSzInBytes = Sz / 8;
unsigned SzInBytes = EltSzInBytes * ChainSize;		unsigned SzInBytes = EltSzInBytes * ChainSize;
VectorType *VecTy;		VectorType *VecTy;
VectorType *VecLoadTy = dyn_cast<VectorType>(LoadTy);		auto *VecLoadTy = dyn_cast<FixedVectorType>(LoadTy);
if (VecLoadTy)		if (VecLoadTy)
VecTy = FixedVectorType::get(LoadTy->getScalarType(),		VecTy = FixedVectorType::get(LoadTy->getScalarType(),
Chain.size() * VecLoadTy->getNumElements());		Chain.size() * VecLoadTy->getNumElements());
else		else
VecTy = FixedVectorType::get(LoadTy, Chain.size());		VecTy = FixedVectorType::get(LoadTy, Chain.size());

// If it's more than the max vector size or the target has a better		// If it's more than the max vector size or the target has a better
// vector factor, break it into two pieces.		// vector factor, break it into two pieces.
▲ Show 20 Lines • Show All 120 Lines • Show Last 20 Lines

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 624 Lines • ▼ Show 20 Lines	protected:
/// Returns (and creates if needed) the original loop trip count.		/// Returns (and creates if needed) the original loop trip count.
Value getOrCreateTripCount(Loop NewLoop);		Value getOrCreateTripCount(Loop NewLoop);

/// Returns (and creates if needed) the trip count of the widened loop.		/// Returns (and creates if needed) the trip count of the widened loop.
Value getOrCreateVectorTripCount(Loop NewLoop);		Value getOrCreateVectorTripCount(Loop NewLoop);

/// Returns a bitcasted value to the requested vector type.		/// Returns a bitcasted value to the requested vector type.
/// Also handles bitcasts of vector<float> <-> vector<pointer> types.		/// Also handles bitcasts of vector<float> <-> vector<pointer> types.
Value createBitOrPointerCast(Value V, VectorType *DstVTy,		Value createBitOrPointerCast(Value V, FixedVectorType *DstVTy,
const DataLayout &DL);		const DataLayout &DL);

/// Emit a bypass check to see if the vector trip count is zero, including if		/// Emit a bypass check to see if the vector trip count is zero, including if
/// it overflows.		/// it overflows.
void emitMinimumIterationCountCheck(Loop L, BasicBlock Bypass);		void emitMinimumIterationCountCheck(Loop L, BasicBlock Bypass);

/// Emit a bypass check to see if all of the SCEV assumptions we've		/// Emit a bypass check to see if all of the SCEV assumptions we've
/// had to make are correct.		/// had to make are correct.
▲ Show 20 Lines • Show All 1,279 Lines • ▼ Show 20 Lines	void InnerLoopVectorizer::widenIntOrFpInduction(PHINode IV, TruncInst Trunc) {
if (!Cost->isScalarEpilogueAllowed())		if (!Cost->isScalarEpilogueAllowed())
CreateSplatIV(ScalarIV, Step);		CreateSplatIV(ScalarIV, Step);
buildScalarSteps(ScalarIV, Step, EntryVal, ID);		buildScalarSteps(ScalarIV, Step, EntryVal, ID);
}		}

Value InnerLoopVectorizer::getStepVector(Value Val, int StartIdx, Value *Step,		Value InnerLoopVectorizer::getStepVector(Value Val, int StartIdx, Value *Step,
Instruction::BinaryOps BinOp) {		Instruction::BinaryOps BinOp) {
// Create and check the types.		// Create and check the types.
auto *ValVTy = cast<VectorType>(Val->getType());		auto *ValVTy = cast<FixedVectorType>(Val->getType());
int VLen = ValVTy->getNumElements();		int VLen = ValVTy->getNumElements();

Type *STy = Val->getType()->getScalarType();		Type *STy = Val->getType()->getScalarType();
assert((STy->isIntegerTy() \|\| STy->isFloatingPointTy()) &&		assert((STy->isIntegerTy() \|\| STy->isFloatingPointTy()) &&
"Induction Step must be an integer or FP");		"Induction Step must be an integer or FP");
assert(Step->getType() == STy && "Step has wrong type");		assert(Step->getType() == STy && "Step has wrong type");

SmallVector<Constant *, 8> Indices;		SmallVector<Constant *, 8> Indices;
▲ Show 20 Lines • Show All 359 Lines • ▼ Show 20 Lines	for (unsigned I = 0; I < InterleaveFactor; ++I) {

auto StrideMask = createStrideMask(I, InterleaveFactor, VF);		auto StrideMask = createStrideMask(I, InterleaveFactor, VF);
for (unsigned Part = 0; Part < UF; Part++) {		for (unsigned Part = 0; Part < UF; Part++) {
Value *StridedVec = Builder.CreateShuffleVector(		Value *StridedVec = Builder.CreateShuffleVector(
NewLoads[Part], UndefVec, StrideMask, "strided.vec");		NewLoads[Part], UndefVec, StrideMask, "strided.vec");

// If this member has different type, cast the result type.		// If this member has different type, cast the result type.
if (Member->getType() != ScalarTy) {		if (Member->getType() != ScalarTy) {
VectorType *OtherVTy = FixedVectorType::get(Member->getType(), VF);		auto *OtherVTy = FixedVectorType::get(Member->getType(), VF);
StridedVec = createBitOrPointerCast(StridedVec, OtherVTy, DL);		StridedVec = createBitOrPointerCast(StridedVec, OtherVTy, DL);
}		}

if (Group->isReverse())		if (Group->isReverse())
StridedVec = reverseVector(StridedVec);		StridedVec = reverseVector(StridedVec);

VectorLoopValueMap.setVectorValue(Member, Part, StridedVec);		VectorLoopValueMap.setVectorValue(Member, Part, StridedVec);
}		}
▲ Show 20 Lines • Show All 346 Lines • ▼ Show 20 Lines	if (VF > 1 && Cost->requiresScalarEpilogue()) {
R = Builder.CreateSelect(IsZero, Step, R);		R = Builder.CreateSelect(IsZero, Step, R);
}		}

VectorTripCount = Builder.CreateSub(TC, R, "n.vec");		VectorTripCount = Builder.CreateSub(TC, R, "n.vec");

return VectorTripCount;		return VectorTripCount;
}		}

Value InnerLoopVectorizer::createBitOrPointerCast(Value V, VectorType *DstVTy,		Value InnerLoopVectorizer::createBitOrPointerCast(Value V,
		FixedVectorType *DstVTy,
const DataLayout &DL) {		const DataLayout &DL) {
// Verify that V is a vector type with same number of elements as DstVTy.		// Verify that V is a vector type with same number of elements as DstVTy.
unsigned VF = DstVTy->getNumElements();		unsigned VF = DstVTy->getNumElements();
VectorType *SrcVecTy = cast<VectorType>(V->getType());		auto *SrcVecTy = cast<FixedVectorType>(V->getType());
assert((VF == SrcVecTy->getNumElements()) && "Vector dimensions do not match");		assert((VF == SrcVecTy->getNumElements()) && "Vector dimensions do not match");
Type *SrcElemTy = SrcVecTy->getElementType();		Type *SrcElemTy = SrcVecTy->getElementType();
Type *DstElemTy = DstVTy->getElementType();		Type *DstElemTy = DstVTy->getElementType();
assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&		assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
"Vector elements must have same size");		"Vector elements must have same size");

// Do a direct cast if element types are castable.		// Do a direct cast if element types are castable.
if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {		if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
▲ Show 20 Lines • Show All 687 Lines • ▼ Show 20 Lines	for (const auto &KV : Cost->getMinimalBitwidths()) {
for (unsigned Part = 0; Part < UF; ++Part) {		for (unsigned Part = 0; Part < UF; ++Part) {
Value *I = getOrCreateVectorValue(KV.first, Part);		Value *I = getOrCreateVectorValue(KV.first, Part);
if (Erased.count(I) \|\| I->use_empty() \|\| !isa<Instruction>(I))		if (Erased.count(I) \|\| I->use_empty() \|\| !isa<Instruction>(I))
continue;		continue;
Type *OriginalTy = I->getType();		Type *OriginalTy = I->getType();
Type *ScalarTruncatedTy =		Type *ScalarTruncatedTy =
IntegerType::get(OriginalTy->getContext(), KV.second);		IntegerType::get(OriginalTy->getContext(), KV.second);
auto *TruncatedTy = FixedVectorType::get(		auto *TruncatedTy = FixedVectorType::get(
ScalarTruncatedTy, cast<VectorType>(OriginalTy)->getNumElements());		ScalarTruncatedTy,
		cast<FixedVectorType>(OriginalTy)->getNumElements());
if (TruncatedTy == OriginalTy)		if (TruncatedTy == OriginalTy)
continue;		continue;

IRBuilder<> B(cast<Instruction>(I));		IRBuilder<> B(cast<Instruction>(I));
auto ShrinkOperand = [&](Value V) -> Value {		auto ShrinkOperand = [&](Value V) -> Value {
if (auto *ZI = dyn_cast<ZExtInst>(V))		if (auto *ZI = dyn_cast<ZExtInst>(V))
if (ZI->getSrcTy() == TruncatedTy)		if (ZI->getSrcTy() == TruncatedTy)
return ZI->getOperand(0);		return ZI->getOperand(0);
Show All 33 Lines	for (unsigned Part = 0; Part < UF; ++Part) {
break;		break;
case Instruction::ZExt:		case Instruction::ZExt:
NewI = B.CreateZExtOrTrunc(		NewI = B.CreateZExtOrTrunc(
CI->getOperand(0),		CI->getOperand(0),
smallestIntegerVectorType(OriginalTy, TruncatedTy));		smallestIntegerVectorType(OriginalTy, TruncatedTy));
break;		break;
}		}
} else if (auto *SI = dyn_cast<ShuffleVectorInst>(I)) {		} else if (auto *SI = dyn_cast<ShuffleVectorInst>(I)) {
auto Elements0 =		auto Elements0 = cast<FixedVectorType>(SI->getOperand(0)->getType())
cast<VectorType>(SI->getOperand(0)->getType())->getNumElements();		->getNumElements();
auto *O0 = B.CreateZExtOrTrunc(		auto *O0 = B.CreateZExtOrTrunc(
SI->getOperand(0),		SI->getOperand(0),
FixedVectorType::get(ScalarTruncatedTy, Elements0));		FixedVectorType::get(ScalarTruncatedTy, Elements0));
auto Elements1 =		auto Elements1 = cast<FixedVectorType>(SI->getOperand(1)->getType())
cast<VectorType>(SI->getOperand(1)->getType())->getNumElements();		->getNumElements();
auto *O1 = B.CreateZExtOrTrunc(		auto *O1 = B.CreateZExtOrTrunc(
SI->getOperand(1),		SI->getOperand(1),
FixedVectorType::get(ScalarTruncatedTy, Elements1));		FixedVectorType::get(ScalarTruncatedTy, Elements1));

NewI = B.CreateShuffleVector(O0, O1, SI->getShuffleMask());		NewI = B.CreateShuffleVector(O0, O1, SI->getShuffleMask());
} else if (isa<LoadInst>(I) \|\| isa<PHINode>(I)) {		} else if (isa<LoadInst>(I) \|\| isa<PHINode>(I)) {
// Don't do anything with the operands, just extend the result.		// Don't do anything with the operands, just extend the result.
continue;		continue;
} else if (auto *IE = dyn_cast<InsertElementInst>(I)) {		} else if (auto *IE = dyn_cast<InsertElementInst>(I)) {
auto Elements =		auto Elements = cast<FixedVectorType>(IE->getOperand(0)->getType())
cast<VectorType>(IE->getOperand(0)->getType())->getNumElements();		->getNumElements();
auto *O0 = B.CreateZExtOrTrunc(		auto *O0 = B.CreateZExtOrTrunc(
IE->getOperand(0),		IE->getOperand(0),
FixedVectorType::get(ScalarTruncatedTy, Elements));		FixedVectorType::get(ScalarTruncatedTy, Elements));
auto *O1 = B.CreateZExtOrTrunc(IE->getOperand(1), ScalarTruncatedTy);		auto *O1 = B.CreateZExtOrTrunc(IE->getOperand(1), ScalarTruncatedTy);
NewI = B.CreateInsertElement(O0, O1, IE->getOperand(2));		NewI = B.CreateInsertElement(O0, O1, IE->getOperand(2));
} else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {		} else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
auto Elements =		auto Elements = cast<FixedVectorType>(EE->getOperand(0)->getType())
cast<VectorType>(EE->getOperand(0)->getType())->getNumElements();		->getNumElements();
auto *O0 = B.CreateZExtOrTrunc(		auto *O0 = B.CreateZExtOrTrunc(
EE->getOperand(0),		EE->getOperand(0),
FixedVectorType::get(ScalarTruncatedTy, Elements));		FixedVectorType::get(ScalarTruncatedTy, Elements));
NewI = B.CreateExtractElement(O0, EE->getOperand(2));		NewI = B.CreateExtractElement(O0, EE->getOperand(2));
} else {		} else {
// If we don't know what to do, be conservative and don't do anything.		// If we don't know what to do, be conservative and don't do anything.
continue;		continue;
}		}
▲ Show 20 Lines • Show All 4,697 Lines • Show Last 20 Lines

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 279 Lines • ▼ Show 20 Lines
/// %ins4 = insertelement <4 x i8> %ins3, i8 %9, i32 3		/// %ins4 = insertelement <4 x i8> %ins3, i8 %9, i32 3
/// ret <4 x i8> %ins4		/// ret <4 x i8> %ins4
/// InstCombiner transforms this into a shuffle and vector mul		/// InstCombiner transforms this into a shuffle and vector mul
/// TODO: Can we split off and reuse the shuffle mask detection from		/// TODO: Can we split off and reuse the shuffle mask detection from
/// TargetTransformInfo::getInstructionThroughput?		/// TargetTransformInfo::getInstructionThroughput?
static Optional<TargetTransformInfo::ShuffleKind>		static Optional<TargetTransformInfo::ShuffleKind>
isShuffle(ArrayRef<Value *> VL) {		isShuffle(ArrayRef<Value *> VL) {
auto *EI0 = cast<ExtractElementInst>(VL[0]);		auto *EI0 = cast<ExtractElementInst>(VL[0]);
unsigned Size = EI0->getVectorOperandType()->getNumElements();		unsigned Size =
		cast<FixedVectorType>(EI0->getVectorOperandType())->getNumElements();
Value *Vec1 = nullptr;		Value *Vec1 = nullptr;
Value *Vec2 = nullptr;		Value *Vec2 = nullptr;
enum ShuffleMode { Unknown, Select, Permute };		enum ShuffleMode { Unknown, Select, Permute };
ShuffleMode CommonShuffleMode = Unknown;		ShuffleMode CommonShuffleMode = Unknown;
for (unsigned I = 0, E = VL.size(); I < E; ++I) {		for (unsigned I = 0, E = VL.size(); I < E; ++I) {
auto *EI = cast<ExtractElementInst>(VL[I]);		auto *EI = cast<ExtractElementInst>(VL[I]);
auto *Vec = EI->getVectorOperand();		auto *Vec = EI->getVectorOperand();
// All vector operands must have the same number of vector elements.		// All vector operands must have the same number of vector elements.
if (cast<VectorType>(Vec->getType())->getNumElements() != Size)		if (cast<FixedVectorType>(Vec->getType())->getNumElements() != Size)
return None;		return None;
auto *Idx = dyn_cast<ConstantInt>(EI->getIndexOperand());		auto *Idx = dyn_cast<ConstantInt>(EI->getIndexOperand());
if (!Idx)		if (!Idx)
return None;		return None;
// Undefined behavior if Idx is negative or >= Size.		// Undefined behavior if Idx is negative or >= Size.
if (Idx->getValue().uge(Size))		if (Idx->getValue().uge(Size))
continue;		continue;
unsigned IntIdx = Idx->getValue().getZExtValue();		unsigned IntIdx = Idx->getValue().getZExtValue();
▲ Show 20 Lines • Show All 1,099 Lines • ▼ Show 20 Lines	private:
/// Vectorize a single entry in the tree.		/// Vectorize a single entry in the tree.
Value vectorizeTree(TreeEntry E);		Value vectorizeTree(TreeEntry E);

/// Vectorize a single entry in the tree, starting in \p VL.		/// Vectorize a single entry in the tree, starting in \p VL.
Value vectorizeTree(ArrayRef<Value > VL);		Value vectorizeTree(ArrayRef<Value > VL);

/// \returns the scalarization cost for this type. Scalarization in this		/// \returns the scalarization cost for this type. Scalarization in this
/// context means the creation of vectors from a group of scalars.		/// context means the creation of vectors from a group of scalars.
int getGatherCost(VectorType *Ty,		int getGatherCost(FixedVectorType *Ty,
const DenseSet<unsigned> &ShuffledIndices) const;		const DenseSet<unsigned> &ShuffledIndices) const;

/// \returns the scalarization cost for this list of values. Assuming that		/// \returns the scalarization cost for this list of values. Assuming that
/// this subtree gets vectorized, we may need to extract the values from the		/// this subtree gets vectorized, we may need to extract the values from the
/// roots. This method calculates the cost of extracting the values.		/// roots. This method calculates the cost of extracting the values.
int getGatherCost(ArrayRef<Value *> VL) const;		int getGatherCost(ArrayRef<Value *> VL) const;

/// Set the Builder insert point to one after the last instruction in		/// Set the Builder insert point to one after the last instruction in
/// the bundle		/// the bundle
void setInsertPointAfterBundle(TreeEntry *E);		void setInsertPointAfterBundle(TreeEntry *E);

/// \returns a vector from a collection of scalars in \p VL.		/// \returns a vector from a collection of scalars in \p VL.
Value Gather(ArrayRef<Value > VL, VectorType *Ty);		Value Gather(ArrayRef<Value > VL, FixedVectorType *Ty);

/// \returns whether the VectorizableTree is fully vectorizable and will		/// \returns whether the VectorizableTree is fully vectorizable and will
/// be beneficial even the tree height is tiny.		/// be beneficial even the tree height is tiny.
bool isFullyVectorizableTinyTree() const;		bool isFullyVectorizableTinyTree() const;

/// Reorder commutative or alt operands to get better probability of		/// Reorder commutative or alt operands to get better probability of
/// generating vectorized code.		/// generating vectorized code.
static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,		static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
▲ Show 20 Lines • Show All 1,725 Lines • ▼ Show 20 Lines	if (auto *ST = dyn_cast<StructType>(EltTy)) {
if (Ty != *ST->element_begin())		if (Ty != *ST->element_begin())
return 0;		return 0;
N *= ST->getNumElements();		N *= ST->getNumElements();
EltTy = *ST->element_begin();		EltTy = *ST->element_begin();
} else if (auto *AT = dyn_cast<ArrayType>(EltTy)) {		} else if (auto *AT = dyn_cast<ArrayType>(EltTy)) {
N *= AT->getNumElements();		N *= AT->getNumElements();
EltTy = AT->getElementType();		EltTy = AT->getElementType();
} else {		} else {
auto *VT = cast<VectorType>(EltTy);		auto *VT = cast<FixedVectorType>(EltTy);
N *= VT->getNumElements();		N *= VT->getNumElements();
EltTy = VT->getElementType();		EltTy = VT->getElementType();
}		}
}		}

if (!isValidElementType(EltTy))		if (!isValidElementType(EltTy))
return 0;		return 0;
uint64_t VTSize = DL.getTypeStoreSizeInBits(FixedVectorType::get(EltTy, N));		uint64_t VTSize = DL.getTypeStoreSizeInBits(FixedVectorType::get(EltTy, N));
Show All 21 Lines	if (E0->getOpcode() == Instruction::ExtractValue) {
NElts = canMapToVector(Vec->getType(), DL);		NElts = canMapToVector(Vec->getType(), DL);
if (!NElts)		if (!NElts)
return false;		return false;
// Check if load can be rewritten as load of vector.		// Check if load can be rewritten as load of vector.
LoadInst *LI = dyn_cast<LoadInst>(Vec);		LoadInst *LI = dyn_cast<LoadInst>(Vec);
if (!LI \|\| !LI->isSimple() \|\| !LI->hasNUses(VL.size()))		if (!LI \|\| !LI->isSimple() \|\| !LI->hasNUses(VL.size()))
return false;		return false;
} else {		} else {
NElts = cast<VectorType>(Vec->getType())->getNumElements();		NElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
}		}

if (NElts != VL.size())		if (NElts != VL.size())
return false;		return false;

// Check that all of the indices extract from the correct offset.		// Check that all of the indices extract from the correct offset.
bool ShouldKeepOrder = true;		bool ShouldKeepOrder = true;
unsigned E = VL.size();		unsigned E = VL.size();
Show All 34 Lines
bool BoUpSLP::areAllUsersVectorized(Instruction *I) const {		bool BoUpSLP::areAllUsersVectorized(Instruction *I) const {
return I->hasOneUse() \|\|		return I->hasOneUse() \|\|
std::all_of(I->user_begin(), I->user_end(), [this](User *U) {		std::all_of(I->user_begin(), I->user_end(), [this](User *U) {
return ScalarToTreeEntry.count(U) > 0;		return ScalarToTreeEntry.count(U) > 0;
});		});
}		}

static std::pair<unsigned, unsigned>		static std::pair<unsigned, unsigned>
getVectorCallCosts(CallInst CI, VectorType VecTy, TargetTransformInfo *TTI,		getVectorCallCosts(CallInst CI, FixedVectorType VecTy,
TargetLibraryInfo *TLI) {		TargetTransformInfo TTI, TargetLibraryInfo TLI) {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);		Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);

// Calculate the cost of the scalar and vector calls.		// Calculate the cost of the scalar and vector calls.
IntrinsicCostAttributes CostAttrs(ID, *CI, VecTy->getNumElements());		IntrinsicCostAttributes CostAttrs(ID, *CI, VecTy->getNumElements());
int IntrinsicCost =		int IntrinsicCost =
TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput);		TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput);

auto Shape =		auto Shape =
▲ Show 20 Lines • Show All 640 Lines • ▼ Show 20 Lines	int BoUpSLP::getTreeCost() {
LLVM_DEBUG(dbgs() << Str);		LLVM_DEBUG(dbgs() << Str);

if (ViewSLPTree)		if (ViewSLPTree)
ViewGraph(this, "SLP" + F->getName(), false, Str);		ViewGraph(this, "SLP" + F->getName(), false, Str);

return Cost;		return Cost;
}		}

int BoUpSLP::getGatherCost(VectorType *Ty,		int BoUpSLP::getGatherCost(FixedVectorType *Ty,
const DenseSet<unsigned> &ShuffledIndices) const {		const DenseSet<unsigned> &ShuffledIndices) const {
unsigned NumElts = Ty->getNumElements();		unsigned NumElts = Ty->getNumElements();
APInt DemandedElts = APInt::getNullValue(NumElts);		APInt DemandedElts = APInt::getNullValue(NumElts);
for (unsigned i = 0; i < NumElts; ++i)		for (unsigned i = 0; i < NumElts; ++i)
if (!ShuffledIndices.count(i))		if (!ShuffledIndices.count(i))
DemandedElts.setBit(i);		DemandedElts.setBit(i);
int Cost = TTI->getScalarizationOverhead(Ty, DemandedElts, /Insert/ true,		int Cost = TTI->getScalarizationOverhead(Ty, DemandedElts, /Insert/ true,
/Extract/ false);		/Extract/ false);
▲ Show 20 Lines • Show All 96 Lines • ▼ Show 20 Lines	void BoUpSLP::setInsertPointAfterBundle(TreeEntry *E) {
assert(LastInst && "Failed to find last instruction in bundle");		assert(LastInst && "Failed to find last instruction in bundle");

// Set the insertion point after the last instruction in the bundle. Set the		// Set the insertion point after the last instruction in the bundle. Set the
// debug location to Front.		// debug location to Front.
Builder.SetInsertPoint(BB, ++LastInst->getIterator());		Builder.SetInsertPoint(BB, ++LastInst->getIterator());
Builder.SetCurrentDebugLocation(Front->getDebugLoc());		Builder.SetCurrentDebugLocation(Front->getDebugLoc());
}		}

Value BoUpSLP::Gather(ArrayRef<Value > VL, VectorType *Ty) {		Value BoUpSLP::Gather(ArrayRef<Value > VL, FixedVectorType *Ty) {
Value *Vec = UndefValue::get(Ty);		Value *Vec = UndefValue::get(Ty);
// Generate the 'InsertElement' instruction.		// Generate the 'InsertElement' instruction.
for (unsigned i = 0; i < Ty->getNumElements(); ++i) {		for (unsigned i = 0; i < Ty->getNumElements(); ++i) {
Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));		Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
if (auto *Insrt = dyn_cast<InsertElementInst>(Vec)) {		if (auto *Insrt = dyn_cast<InsertElementInst>(Vec)) {
GatherSeq.insert(Insrt);		GatherSeq.insert(Insrt);
CSEBlocks.insert(Insrt->getParent());		CSEBlocks.insert(Insrt->getParent());

▲ Show 20 Lines • Show All 3,600 Lines • Show Last 20 Lines

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Show First 20 Lines • Show All 362 Lines • ▼ Show 20 Lines	bool VectorCombine::foldBitcastShuf(Instruction &I) {
Value *V;		Value *V;
ArrayRef<int> Mask;		ArrayRef<int> Mask;
if (!match(&I, m_BitCast(		if (!match(&I, m_BitCast(
m_OneUse(m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))))))		m_OneUse(m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))))))
return false;		return false;

// Disallow non-vector casts and length-changing shuffles.		// Disallow non-vector casts and length-changing shuffles.
// TODO: We could allow any shuffle.		// TODO: We could allow any shuffle.
auto *DestTy = dyn_cast<VectorType>(I.getType());		auto *DestTy = dyn_cast<FixedVectorType>(I.getType());
auto *SrcTy = cast<VectorType>(V->getType());		auto *SrcTy = cast<FixedVectorType>(V->getType());
		spatelUnsubmitted Done Reply Inline Actions IIUC, we can't safely cast to FixedVectorType at this point (the dyn_cast may have failed). Should we add a test like this: define <vscale x 4 x float> @scalable_bitcast_same_elt_size(<vscale x 4 x i32> %v) { %shuf = shufflevector <vscale x 4 x i32> %v, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer %r = bitcast <vscale x 4 x i32> %shuf to <vscale x 4 x float> ret <vscale x 4 x float> %r } I would add that in a preliminary commit myself, but it already crashes somewhere in the TTI cost model. spatel: IIUC, we can't safely cast to FixedVectorType at this point (the dyn_cast may have failed).
		ctetreauAuthorUnsubmitted Done Reply Inline Actions I can take a look at adding this test case and seeing if I can get to this line with a scalable vector. ctetreau: I can take a look at adding this test case and seeing if I can get to this line with a scalable…
		ctetreauAuthorUnsubmitted Done Reply Inline Actions Looking at this more closely, it occurs to me that the TTI stuff is completely unimplemented for scalable vectors currently, so I'm not going to be able to add this test case. That said, for now, the scope of the cast to FixedVectorType can be reduced to just the two calls to getNumElements(). In principle, it should be possible to implement this function in terms of ElementCount. I'll add a fixme for this. I think this should be safe, assuming it passes all the tests. ctetreau: Looking at this more closely, it occurs to me that the TTI stuff is completely unimplemented…
		spatelUnsubmitted Not Done Reply Inline Actions I think there's still a potential bug in doing the plain `cast<>` even if we can't show it in a test at this time. Ie, we should do: auto SrcTy = dyn_cast<FixedVectorType>(V->getType()); if (!DestTy \|\| !SrcTy \|\| ...) spatel:* I think there's still a potential bug in doing the plain `cast<>` even if we can't show it in a…
		ctetreauAuthorUnsubmitted Done Reply Inline Actions What is the bug? V must be nonnull because the matcher succeeded, and it must be a value with type VectorType, because it is the first argument to a shufflevector. What am I missing? ctetreau: What is the bug? V must be nonnull because the matcher succeeded, and it must be a value with…
		spatelUnsubmitted Not Done Reply Inline Actions The diff has changed, so the above code is fine now. But the diff below is nakedly casting to `FixedVectorType`. How did we ensure that our generic `VectorType` values actually are `FixedVectorType`? Ie, if we re-arrange this code for some reason to move the TTI checks after the diff below, then my test example will crash on this line: unsigned DestNumElts = cast<FixedVectorType>(DestTy)->getNumElements(); spatel: The diff has changed, so the above code is fine now. But the diff below is nakedly casting to…
		ctetreauAuthorUnsubmitted Done Reply Inline Actions Oh yeah, sorry about that. I posted the comments before I pushed the diff. To answer your question, nothing prevents foldBitcastShuf from being called with a scalable vector. It just happens to not happen yet. Prior to this change, the code just does the wrong thing for scalable vectors. It might just happen to work, given how shuffle masks for scalable vectors work, but if masks other than all 0, or all -1 are ever added, this will break down. For all these patches to remove calls to getNumElements(), the strategy has been to assume that existing calls to getNumElements() that don't check if the vector is scalable are explicitly expecting the vector to have fixed width. All these calls are being changed to unconditionally cast to FixedVectorType. If the existing tests don't break, then it must be the case that the assumption was correct. I acknowledge that on some level, substituting a miscompile with a crash is actually a behavior change. However, my goal has been to prevent different control flow paths from being taken after the point of the bug. At the point of a call to getNumElements(), either the compiler will crash if it gets a scalable vector, or it will behave the same as it did before. For this case, TTI.getShuffleCost will already crash if it DestTy or SrcTy are scalable vectors, so this really is an NFC. ctetreau: Oh yeah, sorry about that. I posted the comments before I pushed the diff. To answer your…
		spatelUnsubmitted Not Done Reply Inline Actions Thanks for explaining. I'm ok with that reasoning. And as noted, we're going to crash in TTI if anyone tries to push scalable vector code through here or the other vector passes. Note: Since I became aware of the scalable vector changes that you've been making, I have tried to be safer by using dyn_cast<FixedVectorType> in this pass (because I don't know if the fixed vector transforms I'm looking at translate to scalable). So I'd have gone with some variation of the earlier version of this diff to avoid being blamed for crashing, but we can leave this as-is if that makes it easier to get scalable vectors off the ground. spatel: Thanks for explaining. I'm ok with that reasoning. And as noted, we're going to crash in TTI if…
if (!DestTy \|\| I.getOperand(0)->getType() != SrcTy)		if (!DestTy \|\| I.getOperand(0)->getType() != SrcTy)
return false;		return false;

// The new shuffle must not cost more than the old shuffle. The bitcast is		// The new shuffle must not cost more than the old shuffle. The bitcast is
// moved ahead of the shuffle, so assume that it has the same cost as before.		// moved ahead of the shuffle, so assume that it has the same cost as before.
if (TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DestTy) >		if (TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DestTy) >
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy))		TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy))
return false;		return false;
▲ Show 20 Lines • Show All 319 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[SVE] Remove calls to VectorType::getNumElements from Transforms/Vectorize
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 279365

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[SVE] Remove calls to VectorType::getNumElements from Transforms/VectorizeClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 279365

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

[SVE] Remove calls to VectorType::getNumElements from Transforms/Vectorize
ClosedPublic