Diff 288412

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

Show First 20 Lines • Show All 1,020 Lines • ▼ Show 20 Lines	bool Vectorizer::vectorizeStoreChain(
// Update Chain to the valid vectorizable subchain.		// Update Chain to the valid vectorizable subchain.
Chain = NewChain;		Chain = NewChain;
ChainSize = Chain.size();		ChainSize = Chain.size();

// Check if it's legal to vectorize this chain. If not, split the chain and		// Check if it's legal to vectorize this chain. If not, split the chain and
// try again.		// try again.
unsigned EltSzInBytes = Sz / 8;		unsigned EltSzInBytes = Sz / 8;
unsigned SzInBytes = EltSzInBytes * ChainSize;		unsigned SzInBytes = EltSzInBytes * ChainSize;

VectorType *VecTy;		FixedVectorType *VecTy;
		spatelUnsubmitted Done Reply Inline Actions Can VecTy be changed to FixedVectorType? spatel:* Can VecTy be changed to FixedVectorType*?
VectorType *VecStoreTy = dyn_cast<VectorType>(StoreTy);		auto *VecStoreTy = dyn_cast<FixedVectorType>(StoreTy);
if (VecStoreTy)		if (VecStoreTy)
VecTy = FixedVectorType::get(StoreTy->getScalarType(),		VecTy = FixedVectorType::get(StoreTy->getScalarType(),
Chain.size() * VecStoreTy->getNumElements());		Chain.size() * VecStoreTy->getNumElements());
else		else
VecTy = FixedVectorType::get(StoreTy, Chain.size());		VecTy = FixedVectorType::get(StoreTy, Chain.size());

// If it's more than the max vector size or the target has a better		// If it's more than the max vector size or the target has a better
// vector factor, break it into two pieces.		// vector factor, break it into two pieces.
▲ Show 20 Lines • Show All 135 Lines • ▼ Show 20 Lines	bool Vectorizer::vectorizeLoadChain(
Chain = NewChain;		Chain = NewChain;
ChainSize = Chain.size();		ChainSize = Chain.size();

// Check if it's legal to vectorize this chain. If not, split the chain and		// Check if it's legal to vectorize this chain. If not, split the chain and
// try again.		// try again.
unsigned EltSzInBytes = Sz / 8;		unsigned EltSzInBytes = Sz / 8;
unsigned SzInBytes = EltSzInBytes * ChainSize;		unsigned SzInBytes = EltSzInBytes * ChainSize;
VectorType *VecTy;		VectorType *VecTy;
VectorType *VecLoadTy = dyn_cast<VectorType>(LoadTy);		auto *VecLoadTy = dyn_cast<FixedVectorType>(LoadTy);
if (VecLoadTy)		if (VecLoadTy)
VecTy = FixedVectorType::get(LoadTy->getScalarType(),		VecTy = FixedVectorType::get(LoadTy->getScalarType(),
Chain.size() * VecLoadTy->getNumElements());		Chain.size() * VecLoadTy->getNumElements());
else		else
VecTy = FixedVectorType::get(LoadTy, Chain.size());		VecTy = FixedVectorType::get(LoadTy, Chain.size());

// If it's more than the max vector size or the target has a better		// If it's more than the max vector size or the target has a better
// vector factor, break it into two pieces.		// vector factor, break it into two pieces.
▲ Show 20 Lines • Show All 120 Lines • Show Last 20 Lines

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 2,029 Lines • ▼ Show 20 Lines	void InnerLoopVectorizer::widenIntOrFpInduction(PHINode IV, TruncInst Trunc) {
if (!Cost->isScalarEpilogueAllowed())		if (!Cost->isScalarEpilogueAllowed())
CreateSplatIV(ScalarIV, Step);		CreateSplatIV(ScalarIV, Step);
buildScalarSteps(ScalarIV, Step, EntryVal, ID);		buildScalarSteps(ScalarIV, Step, EntryVal, ID);
}		}

Value InnerLoopVectorizer::getStepVector(Value Val, int StartIdx, Value *Step,		Value InnerLoopVectorizer::getStepVector(Value Val, int StartIdx, Value *Step,
Instruction::BinaryOps BinOp) {		Instruction::BinaryOps BinOp) {
// Create and check the types.		// Create and check the types.
auto *ValVTy = cast<VectorType>(Val->getType());		auto *ValVTy = cast<FixedVectorType>(Val->getType());
int VLen = ValVTy->getNumElements();		int VLen = ValVTy->getNumElements();

Type *STy = Val->getType()->getScalarType();		Type *STy = Val->getType()->getScalarType();
assert((STy->isIntegerTy() \|\| STy->isFloatingPointTy()) &&		assert((STy->isIntegerTy() \|\| STy->isFloatingPointTy()) &&
"Induction Step must be an integer or FP");		"Induction Step must be an integer or FP");
assert(Step->getType() == STy && "Step has wrong type");		assert(Step->getType() == STy && "Step has wrong type");

SmallVector<Constant *, 8> Indices;		SmallVector<Constant *, 8> Indices;
▲ Show 20 Lines • Show All 747 Lines • ▼ Show 20 Lines	Value InnerLoopVectorizer::getOrCreateVectorTripCount(Loop L) {
VectorTripCount = Builder.CreateSub(TC, R, "n.vec");		VectorTripCount = Builder.CreateSub(TC, R, "n.vec");

return VectorTripCount;		return VectorTripCount;
}		}

Value InnerLoopVectorizer::createBitOrPointerCast(Value V, VectorType *DstVTy,		Value InnerLoopVectorizer::createBitOrPointerCast(Value V, VectorType *DstVTy,
const DataLayout &DL) {		const DataLayout &DL) {
// Verify that V is a vector type with same number of elements as DstVTy.		// Verify that V is a vector type with same number of elements as DstVTy.
assert(isa<FixedVectorType>(DstVTy) &&		auto *DstFVTy = cast<FixedVectorType>(DstVTy);
"Vector type is assumed to be fixed width.");		unsigned VF = DstFVTy->getNumElements();
unsigned VF = DstVTy->getNumElements();		auto *SrcVecTy = cast<FixedVectorType>(V->getType());
VectorType *SrcVecTy = cast<VectorType>(V->getType());
assert((VF == SrcVecTy->getNumElements()) && "Vector dimensions do not match");		assert((VF == SrcVecTy->getNumElements()) && "Vector dimensions do not match");
Type *SrcElemTy = SrcVecTy->getElementType();		Type *SrcElemTy = SrcVecTy->getElementType();
Type *DstElemTy = DstVTy->getElementType();		Type *DstElemTy = DstFVTy->getElementType();
assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&		assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
"Vector elements must have same size");		"Vector elements must have same size");

// Do a direct cast if element types are castable.		// Do a direct cast if element types are castable.
if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {		if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
return Builder.CreateBitOrPointerCast(V, DstVTy);		return Builder.CreateBitOrPointerCast(V, DstFVTy);
}		}
// V cannot be directly casted to desired vector type.		// V cannot be directly casted to desired vector type.
// May happen when V is a floating point vector but DstVTy is a vector of		// May happen when V is a floating point vector but DstVTy is a vector of
// pointers or vice-versa. Handle this using a two-step bitcast using an		// pointers or vice-versa. Handle this using a two-step bitcast using an
// intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.		// intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.
assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&		assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&
"Only one type should be a pointer type");		"Only one type should be a pointer type");
assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&		assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&
"Only one type should be a floating point type");		"Only one type should be a floating point type");
Type *IntTy =		Type *IntTy =
IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));		IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
auto *VecIntTy = FixedVectorType::get(IntTy, VF);		auto *VecIntTy = FixedVectorType::get(IntTy, VF);
Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);		Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
return Builder.CreateBitOrPointerCast(CastVal, DstVTy);		return Builder.CreateBitOrPointerCast(CastVal, DstFVTy);
}		}

void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,		void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
BasicBlock *Bypass) {		BasicBlock *Bypass) {
Value *Count = getOrCreateTripCount(L);		Value *Count = getOrCreateTripCount(L);
// Reuse existing vector loop preheader for TC checks.		// Reuse existing vector loop preheader for TC checks.
// Note that new preheader block is generated for vector loop.		// Note that new preheader block is generated for vector loop.
BasicBlock *const TCCheckBlock = LoopVectorPreHeader;		BasicBlock *const TCCheckBlock = LoopVectorPreHeader;
▲ Show 20 Lines • Show All 684 Lines • ▼ Show 20 Lines	for (const auto &KV : Cost->getMinimalBitwidths()) {
for (unsigned Part = 0; Part < UF; ++Part) {		for (unsigned Part = 0; Part < UF; ++Part) {
Value *I = getOrCreateVectorValue(KV.first, Part);		Value *I = getOrCreateVectorValue(KV.first, Part);
if (Erased.count(I) \|\| I->use_empty() \|\| !isa<Instruction>(I))		if (Erased.count(I) \|\| I->use_empty() \|\| !isa<Instruction>(I))
continue;		continue;
Type *OriginalTy = I->getType();		Type *OriginalTy = I->getType();
Type *ScalarTruncatedTy =		Type *ScalarTruncatedTy =
IntegerType::get(OriginalTy->getContext(), KV.second);		IntegerType::get(OriginalTy->getContext(), KV.second);
auto *TruncatedTy = FixedVectorType::get(		auto *TruncatedTy = FixedVectorType::get(
ScalarTruncatedTy, cast<VectorType>(OriginalTy)->getNumElements());		ScalarTruncatedTy,
		cast<FixedVectorType>(OriginalTy)->getNumElements());
if (TruncatedTy == OriginalTy)		if (TruncatedTy == OriginalTy)
continue;		continue;

IRBuilder<> B(cast<Instruction>(I));		IRBuilder<> B(cast<Instruction>(I));
auto ShrinkOperand = [&](Value V) -> Value {		auto ShrinkOperand = [&](Value V) -> Value {
if (auto *ZI = dyn_cast<ZExtInst>(V))		if (auto *ZI = dyn_cast<ZExtInst>(V))
if (ZI->getSrcTy() == TruncatedTy)		if (ZI->getSrcTy() == TruncatedTy)
return ZI->getOperand(0);		return ZI->getOperand(0);
Show All 33 Lines	for (unsigned Part = 0; Part < UF; ++Part) {
break;		break;
case Instruction::ZExt:		case Instruction::ZExt:
NewI = B.CreateZExtOrTrunc(		NewI = B.CreateZExtOrTrunc(
CI->getOperand(0),		CI->getOperand(0),
smallestIntegerVectorType(OriginalTy, TruncatedTy));		smallestIntegerVectorType(OriginalTy, TruncatedTy));
break;		break;
}		}
} else if (auto *SI = dyn_cast<ShuffleVectorInst>(I)) {		} else if (auto *SI = dyn_cast<ShuffleVectorInst>(I)) {
auto Elements0 =		auto Elements0 = cast<FixedVectorType>(SI->getOperand(0)->getType())
cast<VectorType>(SI->getOperand(0)->getType())->getNumElements();		->getNumElements();
auto *O0 = B.CreateZExtOrTrunc(		auto *O0 = B.CreateZExtOrTrunc(
SI->getOperand(0),		SI->getOperand(0),
FixedVectorType::get(ScalarTruncatedTy, Elements0));		FixedVectorType::get(ScalarTruncatedTy, Elements0));
auto Elements1 =		auto Elements1 = cast<FixedVectorType>(SI->getOperand(1)->getType())
cast<VectorType>(SI->getOperand(1)->getType())->getNumElements();		->getNumElements();
auto *O1 = B.CreateZExtOrTrunc(		auto *O1 = B.CreateZExtOrTrunc(
SI->getOperand(1),		SI->getOperand(1),
FixedVectorType::get(ScalarTruncatedTy, Elements1));		FixedVectorType::get(ScalarTruncatedTy, Elements1));

NewI = B.CreateShuffleVector(O0, O1, SI->getShuffleMask());		NewI = B.CreateShuffleVector(O0, O1, SI->getShuffleMask());
} else if (isa<LoadInst>(I) \|\| isa<PHINode>(I)) {		} else if (isa<LoadInst>(I) \|\| isa<PHINode>(I)) {
// Don't do anything with the operands, just extend the result.		// Don't do anything with the operands, just extend the result.
continue;		continue;
} else if (auto *IE = dyn_cast<InsertElementInst>(I)) {		} else if (auto *IE = dyn_cast<InsertElementInst>(I)) {
auto Elements =		auto Elements = cast<FixedVectorType>(IE->getOperand(0)->getType())
cast<VectorType>(IE->getOperand(0)->getType())->getNumElements();		->getNumElements();
auto *O0 = B.CreateZExtOrTrunc(		auto *O0 = B.CreateZExtOrTrunc(
IE->getOperand(0),		IE->getOperand(0),
FixedVectorType::get(ScalarTruncatedTy, Elements));		FixedVectorType::get(ScalarTruncatedTy, Elements));
auto *O1 = B.CreateZExtOrTrunc(IE->getOperand(1), ScalarTruncatedTy);		auto *O1 = B.CreateZExtOrTrunc(IE->getOperand(1), ScalarTruncatedTy);
NewI = B.CreateInsertElement(O0, O1, IE->getOperand(2));		NewI = B.CreateInsertElement(O0, O1, IE->getOperand(2));
} else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {		} else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
auto Elements =		auto Elements = cast<FixedVectorType>(EE->getOperand(0)->getType())
cast<VectorType>(EE->getOperand(0)->getType())->getNumElements();		->getNumElements();
auto *O0 = B.CreateZExtOrTrunc(		auto *O0 = B.CreateZExtOrTrunc(
EE->getOperand(0),		EE->getOperand(0),
FixedVectorType::get(ScalarTruncatedTy, Elements));		FixedVectorType::get(ScalarTruncatedTy, Elements));
NewI = B.CreateExtractElement(O0, EE->getOperand(2));		NewI = B.CreateExtractElement(O0, EE->getOperand(2));
} else {		} else {
// If we don't know what to do, be conservative and don't do anything.		// If we don't know what to do, be conservative and don't do anything.
continue;		continue;
}		}
▲ Show 20 Lines • Show All 4,974 Lines • Show Last 20 Lines

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 280 Lines • ▼ Show 20 Lines
/// %ins4 = insertelement <4 x i8> %ins3, i8 %9, i32 3		/// %ins4 = insertelement <4 x i8> %ins3, i8 %9, i32 3
/// ret <4 x i8> %ins4		/// ret <4 x i8> %ins4
/// InstCombiner transforms this into a shuffle and vector mul		/// InstCombiner transforms this into a shuffle and vector mul
/// TODO: Can we split off and reuse the shuffle mask detection from		/// TODO: Can we split off and reuse the shuffle mask detection from
/// TargetTransformInfo::getInstructionThroughput?		/// TargetTransformInfo::getInstructionThroughput?
static Optional<TargetTransformInfo::ShuffleKind>		static Optional<TargetTransformInfo::ShuffleKind>
isShuffle(ArrayRef<Value *> VL) {		isShuffle(ArrayRef<Value *> VL) {
auto *EI0 = cast<ExtractElementInst>(VL[0]);		auto *EI0 = cast<ExtractElementInst>(VL[0]);
unsigned Size = EI0->getVectorOperandType()->getNumElements();		unsigned Size =
		cast<FixedVectorType>(EI0->getVectorOperandType())->getNumElements();
Value *Vec1 = nullptr;		Value *Vec1 = nullptr;
Value *Vec2 = nullptr;		Value *Vec2 = nullptr;
enum ShuffleMode { Unknown, Select, Permute };		enum ShuffleMode { Unknown, Select, Permute };
ShuffleMode CommonShuffleMode = Unknown;		ShuffleMode CommonShuffleMode = Unknown;
for (unsigned I = 0, E = VL.size(); I < E; ++I) {		for (unsigned I = 0, E = VL.size(); I < E; ++I) {
auto *EI = cast<ExtractElementInst>(VL[I]);		auto *EI = cast<ExtractElementInst>(VL[I]);
auto *Vec = EI->getVectorOperand();		auto *Vec = EI->getVectorOperand();
// All vector operands must have the same number of vector elements.		// All vector operands must have the same number of vector elements.
if (cast<VectorType>(Vec->getType())->getNumElements() != Size)		if (cast<FixedVectorType>(Vec->getType())->getNumElements() != Size)
return None;		return None;
auto *Idx = dyn_cast<ConstantInt>(EI->getIndexOperand());		auto *Idx = dyn_cast<ConstantInt>(EI->getIndexOperand());
if (!Idx)		if (!Idx)
return None;		return None;
// Undefined behavior if Idx is negative or >= Size.		// Undefined behavior if Idx is negative or >= Size.
if (Idx->getValue().uge(Size))		if (Idx->getValue().uge(Size))
continue;		continue;
unsigned IntIdx = Idx->getValue().getZExtValue();		unsigned IntIdx = Idx->getValue().getZExtValue();
▲ Show 20 Lines • Show All 1,099 Lines • ▼ Show 20 Lines	private:
/// Vectorize a single entry in the tree.		/// Vectorize a single entry in the tree.
Value vectorizeTree(TreeEntry E);		Value vectorizeTree(TreeEntry E);

/// Vectorize a single entry in the tree, starting in \p VL.		/// Vectorize a single entry in the tree, starting in \p VL.
Value vectorizeTree(ArrayRef<Value > VL);		Value vectorizeTree(ArrayRef<Value > VL);

/// \returns the scalarization cost for this type. Scalarization in this		/// \returns the scalarization cost for this type. Scalarization in this
/// context means the creation of vectors from a group of scalars.		/// context means the creation of vectors from a group of scalars.
int getGatherCost(VectorType *Ty,		int getGatherCost(FixedVectorType *Ty,
const DenseSet<unsigned> &ShuffledIndices) const;		const DenseSet<unsigned> &ShuffledIndices) const;

/// \returns the scalarization cost for this list of values. Assuming that		/// \returns the scalarization cost for this list of values. Assuming that
/// this subtree gets vectorized, we may need to extract the values from the		/// this subtree gets vectorized, we may need to extract the values from the
/// roots. This method calculates the cost of extracting the values.		/// roots. This method calculates the cost of extracting the values.
int getGatherCost(ArrayRef<Value *> VL) const;		int getGatherCost(ArrayRef<Value *> VL) const;

/// Set the Builder insert point to one after the last instruction in		/// Set the Builder insert point to one after the last instruction in
/// the bundle		/// the bundle
void setInsertPointAfterBundle(TreeEntry *E);		void setInsertPointAfterBundle(TreeEntry *E);

/// \returns a vector from a collection of scalars in \p VL.		/// \returns a vector from a collection of scalars in \p VL.
Value Gather(ArrayRef<Value > VL, VectorType *Ty);		Value Gather(ArrayRef<Value > VL, FixedVectorType *Ty);

/// \returns whether the VectorizableTree is fully vectorizable and will		/// \returns whether the VectorizableTree is fully vectorizable and will
/// be beneficial even the tree height is tiny.		/// be beneficial even the tree height is tiny.
bool isFullyVectorizableTinyTree() const;		bool isFullyVectorizableTinyTree() const;

/// Reorder commutative or alt operands to get better probability of		/// Reorder commutative or alt operands to get better probability of
/// generating vectorized code.		/// generating vectorized code.
static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,		static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
▲ Show 20 Lines • Show All 1,725 Lines • ▼ Show 20 Lines	if (auto *ST = dyn_cast<StructType>(EltTy)) {
if (Ty != *ST->element_begin())		if (Ty != *ST->element_begin())
return 0;		return 0;
N *= ST->getNumElements();		N *= ST->getNumElements();
EltTy = *ST->element_begin();		EltTy = *ST->element_begin();
} else if (auto *AT = dyn_cast<ArrayType>(EltTy)) {		} else if (auto *AT = dyn_cast<ArrayType>(EltTy)) {
N *= AT->getNumElements();		N *= AT->getNumElements();
EltTy = AT->getElementType();		EltTy = AT->getElementType();
} else {		} else {
auto *VT = cast<VectorType>(EltTy);		auto *VT = cast<FixedVectorType>(EltTy);
N *= VT->getNumElements();		N *= VT->getNumElements();
EltTy = VT->getElementType();		EltTy = VT->getElementType();
}		}
}		}

if (!isValidElementType(EltTy))		if (!isValidElementType(EltTy))
return 0;		return 0;
uint64_t VTSize = DL.getTypeStoreSizeInBits(FixedVectorType::get(EltTy, N));		uint64_t VTSize = DL.getTypeStoreSizeInBits(FixedVectorType::get(EltTy, N));
Show All 21 Lines	if (E0->getOpcode() == Instruction::ExtractValue) {
NElts = canMapToVector(Vec->getType(), DL);		NElts = canMapToVector(Vec->getType(), DL);
if (!NElts)		if (!NElts)
return false;		return false;
// Check if load can be rewritten as load of vector.		// Check if load can be rewritten as load of vector.
LoadInst *LI = dyn_cast<LoadInst>(Vec);		LoadInst *LI = dyn_cast<LoadInst>(Vec);
if (!LI \|\| !LI->isSimple() \|\| !LI->hasNUses(VL.size()))		if (!LI \|\| !LI->isSimple() \|\| !LI->hasNUses(VL.size()))
return false;		return false;
} else {		} else {
NElts = cast<VectorType>(Vec->getType())->getNumElements();		NElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
}		}

if (NElts != VL.size())		if (NElts != VL.size())
return false;		return false;

// Check that all of the indices extract from the correct offset.		// Check that all of the indices extract from the correct offset.
bool ShouldKeepOrder = true;		bool ShouldKeepOrder = true;
unsigned E = VL.size();		unsigned E = VL.size();
Show All 34 Lines
bool BoUpSLP::areAllUsersVectorized(Instruction *I) const {		bool BoUpSLP::areAllUsersVectorized(Instruction *I) const {
return I->hasOneUse() \|\|		return I->hasOneUse() \|\|
std::all_of(I->user_begin(), I->user_end(), [this](User *U) {		std::all_of(I->user_begin(), I->user_end(), [this](User *U) {
return ScalarToTreeEntry.count(U) > 0;		return ScalarToTreeEntry.count(U) > 0;
});		});
}		}

static std::pair<unsigned, unsigned>		static std::pair<unsigned, unsigned>
getVectorCallCosts(CallInst CI, VectorType VecTy, TargetTransformInfo *TTI,		getVectorCallCosts(CallInst CI, FixedVectorType VecTy,
TargetLibraryInfo *TLI) {		TargetTransformInfo TTI, TargetLibraryInfo TLI) {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);		Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);

// Calculate the cost of the scalar and vector calls.		// Calculate the cost of the scalar and vector calls.
IntrinsicCostAttributes CostAttrs(ID, *CI, VecTy->getNumElements());		IntrinsicCostAttributes CostAttrs(ID, *CI, VecTy->getNumElements());
int IntrinsicCost =		int IntrinsicCost =
TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput);		TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput);

auto Shape = VFShape::get(*CI, ElementCount::getFixed(static_cast<unsigned>(		auto Shape = VFShape::get(*CI, ElementCount::getFixed(static_cast<unsigned>(
▲ Show 20 Lines • Show All 655 Lines • ▼ Show 20 Lines	#ifndef NDEBUG
LLVM_DEBUG(dbgs() << Str);		LLVM_DEBUG(dbgs() << Str);
if (ViewSLPTree)		if (ViewSLPTree)
ViewGraph(this, "SLP" + F->getName(), false, Str);		ViewGraph(this, "SLP" + F->getName(), false, Str);
#endif		#endif

return Cost;		return Cost;
}		}

int BoUpSLP::getGatherCost(VectorType *Ty,		int BoUpSLP::getGatherCost(FixedVectorType *Ty,
const DenseSet<unsigned> &ShuffledIndices) const {		const DenseSet<unsigned> &ShuffledIndices) const {
unsigned NumElts = Ty->getNumElements();		unsigned NumElts = Ty->getNumElements();
APInt DemandedElts = APInt::getNullValue(NumElts);		APInt DemandedElts = APInt::getNullValue(NumElts);
for (unsigned i = 0; i < NumElts; ++i)		for (unsigned i = 0; i < NumElts; ++i)
if (!ShuffledIndices.count(i))		if (!ShuffledIndices.count(i))
DemandedElts.setBit(i);		DemandedElts.setBit(i);
int Cost = TTI->getScalarizationOverhead(Ty, DemandedElts, /Insert/ true,		int Cost = TTI->getScalarizationOverhead(Ty, DemandedElts, /Insert/ true,
/Extract/ false);		/Extract/ false);
▲ Show 20 Lines • Show All 96 Lines • ▼ Show 20 Lines	void BoUpSLP::setInsertPointAfterBundle(TreeEntry *E) {
assert(LastInst && "Failed to find last instruction in bundle");		assert(LastInst && "Failed to find last instruction in bundle");

// Set the insertion point after the last instruction in the bundle. Set the		// Set the insertion point after the last instruction in the bundle. Set the
// debug location to Front.		// debug location to Front.
Builder.SetInsertPoint(BB, ++LastInst->getIterator());		Builder.SetInsertPoint(BB, ++LastInst->getIterator());
Builder.SetCurrentDebugLocation(Front->getDebugLoc());		Builder.SetCurrentDebugLocation(Front->getDebugLoc());
}		}

Value BoUpSLP::Gather(ArrayRef<Value > VL, VectorType *Ty) {		Value BoUpSLP::Gather(ArrayRef<Value > VL, FixedVectorType *Ty) {
Value *Vec = UndefValue::get(Ty);		Value *Vec = UndefValue::get(Ty);
// Generate the 'InsertElement' instruction.		// Generate the 'InsertElement' instruction.
for (unsigned i = 0; i < Ty->getNumElements(); ++i) {		for (unsigned i = 0; i < Ty->getNumElements(); ++i) {
Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));		Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
if (auto *Insrt = dyn_cast<InsertElementInst>(Vec)) {		if (auto *Insrt = dyn_cast<InsertElementInst>(Vec)) {
GatherSeq.insert(Insrt);		GatherSeq.insert(Insrt);
CSEBlocks.insert(Insrt->getParent());		CSEBlocks.insert(Insrt->getParent());

▲ Show 20 Lines • Show All 3,683 Lines • Show Last 20 Lines

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Show First 20 Lines • Show All 422 Lines • ▼ Show 20 Lines	bool VectorCombine::foldBitcastShuf(Instruction &I) {
if (!match(&I, m_BitCast(		if (!match(&I, m_BitCast(
m_OneUse(m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))))))		m_OneUse(m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))))))
return false;		return false;

// Disallow non-vector casts and length-changing shuffles.		// Disallow non-vector casts and length-changing shuffles.
// TODO: We could allow any shuffle.		// TODO: We could allow any shuffle.
auto *DestTy = dyn_cast<VectorType>(I.getType());		auto *DestTy = dyn_cast<VectorType>(I.getType());
auto *SrcTy = cast<VectorType>(V->getType());		auto *SrcTy = cast<VectorType>(V->getType());
if (!DestTy \|\| I.getOperand(0)->getType() != SrcTy)		if (!DestTy \|\| I.getOperand(0)->getType() != SrcTy)
		spatelUnsubmitted Done Reply Inline Actions IIUC, we can't safely cast to FixedVectorType at this point (the dyn_cast may have failed). Should we add a test like this: define <vscale x 4 x float> @scalable_bitcast_same_elt_size(<vscale x 4 x i32> %v) { %shuf = shufflevector <vscale x 4 x i32> %v, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer %r = bitcast <vscale x 4 x i32> %shuf to <vscale x 4 x float> ret <vscale x 4 x float> %r } I would add that in a preliminary commit myself, but it already crashes somewhere in the TTI cost model. spatel: IIUC, we can't safely cast to FixedVectorType at this point (the dyn_cast may have failed).
		ctetreauAuthorUnsubmitted Done Reply Inline Actions I can take a look at adding this test case and seeing if I can get to this line with a scalable vector. ctetreau: I can take a look at adding this test case and seeing if I can get to this line with a scalable…
		ctetreauAuthorUnsubmitted Done Reply Inline Actions Looking at this more closely, it occurs to me that the TTI stuff is completely unimplemented for scalable vectors currently, so I'm not going to be able to add this test case. That said, for now, the scope of the cast to FixedVectorType can be reduced to just the two calls to getNumElements(). In principle, it should be possible to implement this function in terms of ElementCount. I'll add a fixme for this. I think this should be safe, assuming it passes all the tests. ctetreau: Looking at this more closely, it occurs to me that the TTI stuff is completely unimplemented…
		spatelUnsubmitted Not Done Reply Inline Actions I think there's still a potential bug in doing the plain `cast<>` even if we can't show it in a test at this time. Ie, we should do: auto SrcTy = dyn_cast<FixedVectorType>(V->getType()); if (!DestTy \|\| !SrcTy \|\| ...) spatel:* I think there's still a potential bug in doing the plain `cast<>` even if we can't show it in a…
		ctetreauAuthorUnsubmitted Done Reply Inline Actions What is the bug? V must be nonnull because the matcher succeeded, and it must be a value with type VectorType, because it is the first argument to a shufflevector. What am I missing? ctetreau: What is the bug? V must be nonnull because the matcher succeeded, and it must be a value with…
		spatelUnsubmitted Not Done Reply Inline Actions The diff has changed, so the above code is fine now. But the diff below is nakedly casting to `FixedVectorType`. How did we ensure that our generic `VectorType` values actually are `FixedVectorType`? Ie, if we re-arrange this code for some reason to move the TTI checks after the diff below, then my test example will crash on this line: unsigned DestNumElts = cast<FixedVectorType>(DestTy)->getNumElements(); spatel: The diff has changed, so the above code is fine now. But the diff below is nakedly casting to…
		ctetreauAuthorUnsubmitted Done Reply Inline Actions Oh yeah, sorry about that. I posted the comments before I pushed the diff. To answer your question, nothing prevents foldBitcastShuf from being called with a scalable vector. It just happens to not happen yet. Prior to this change, the code just does the wrong thing for scalable vectors. It might just happen to work, given how shuffle masks for scalable vectors work, but if masks other than all 0, or all -1 are ever added, this will break down. For all these patches to remove calls to getNumElements(), the strategy has been to assume that existing calls to getNumElements() that don't check if the vector is scalable are explicitly expecting the vector to have fixed width. All these calls are being changed to unconditionally cast to FixedVectorType. If the existing tests don't break, then it must be the case that the assumption was correct. I acknowledge that on some level, substituting a miscompile with a crash is actually a behavior change. However, my goal has been to prevent different control flow paths from being taken after the point of the bug. At the point of a call to getNumElements(), either the compiler will crash if it gets a scalable vector, or it will behave the same as it did before. For this case, TTI.getShuffleCost will already crash if it DestTy or SrcTy are scalable vectors, so this really is an NFC. ctetreau: Oh yeah, sorry about that. I posted the comments before I pushed the diff. To answer your…
		spatelUnsubmitted Not Done Reply Inline Actions Thanks for explaining. I'm ok with that reasoning. And as noted, we're going to crash in TTI if anyone tries to push scalable vector code through here or the other vector passes. Note: Since I became aware of the scalable vector changes that you've been making, I have tried to be safer by using dyn_cast<FixedVectorType> in this pass (because I don't know if the fixed vector transforms I'm looking at translate to scalable). So I'd have gone with some variation of the earlier version of this diff to avoid being blamed for crashing, but we can leave this as-is if that makes it easier to get scalable vectors off the ground. spatel: Thanks for explaining. I'm ok with that reasoning. And as noted, we're going to crash in TTI if…
return false;		return false;

// The new shuffle must not cost more than the old shuffle. The bitcast is		// The new shuffle must not cost more than the old shuffle. The bitcast is
// moved ahead of the shuffle, so assume that it has the same cost as before.		// moved ahead of the shuffle, so assume that it has the same cost as before.
if (TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DestTy) >		if (TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DestTy) >
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy))		TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy))
return false;		return false;

unsigned DestNumElts = DestTy->getNumElements();		// FIXME: it should be possible to implement the computation of the widened
unsigned SrcNumElts = SrcTy->getNumElements();		// shuffle mask in terms of ElementCount to work with scalable shuffles.
		unsigned DestNumElts = cast<FixedVectorType>(DestTy)->getNumElements();
		unsigned SrcNumElts = cast<FixedVectorType>(SrcTy)->getNumElements();
SmallVector<int, 16> NewMask;		SmallVector<int, 16> NewMask;
if (SrcNumElts <= DestNumElts) {		if (SrcNumElts <= DestNumElts) {
// The bitcast is from wide to narrow/equal elements. The shuffle mask can		// The bitcast is from wide to narrow/equal elements. The shuffle mask can
// always be expanded to the equivalent form choosing narrower elements.		// always be expanded to the equivalent form choosing narrower elements.
assert(DestNumElts % SrcNumElts == 0 && "Unexpected shuffle mask");		assert(DestNumElts % SrcNumElts == 0 && "Unexpected shuffle mask");
unsigned ScaleFactor = DestNumElts / SrcNumElts;		unsigned ScaleFactor = DestNumElts / SrcNumElts;
narrowShuffleMaskElts(ScaleFactor, Mask, NewMask);		narrowShuffleMaskElts(ScaleFactor, Mask, NewMask);
} else {		} else {
▲ Show 20 Lines • Show All 313 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[SVE] Remove calls to VectorType::getNumElements from Transforms/Vectorize
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 288412

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[SVE] Remove calls to VectorType::getNumElements from Transforms/VectorizeClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 288412

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

[SVE] Remove calls to VectorType::getNumElements from Transforms/Vectorize
ClosedPublic