This is an archive of the discontinued LLVM Phabricator instance.

Differential D156068

[WIP] Vectorization for __builtin_prefetch
Needs ReviewPublic

Authored by m-saito-fj on Jul 23 2023, 7:00 PM.

Download Raw Diff

This revision needs review, but there are no reviewers specified.

Details

Reviewers: None

Summary

This patch is for vectorization of a loop with __builtin_prefetch inside the loop as follows

void foo(double * restrict a, double * restrict b, int n){
  int i;.
  for(i=0; i<n; ++i){
    a[i] = a[i] + b[i];
    __builtin_prefetch(&(b[i+8]));
  }
}

Two intrinsics are added: masked_prefetch for continuous prefetch and masked_gather_prefetch for gather prefetch.
In vectorization, the implementation basically uses the Load/Store processing path.

https://discourse.llvm.org/t/rfc-loop-vectorization-for-builtin-prefetch/72234

Diff Detail

Event Timeline

m-saito-fj created this revision.Jul 23 2023, 7:00 PM

Herald added a project: Restricted Project. · View Herald TranscriptJul 23 2023, 7:00 PM

Herald added subscribers: rogfer01, hiraditya. · View Herald Transcript

m-saito-fj requested review of this revision.Jul 23 2023, 7:00 PM

Herald added subscribers: llvm-commits, wangpc, vkmr, jdoerfert. · View Herald TranscriptJul 23 2023, 7:00 PM

m-saito-fj edited the summary of this revision. (Show Details)Jul 23 2023, 7:01 PM

m-saito-fj edited the summary of this revision. (Show Details)Jul 23 2023, 7:25 PM

Harbormaster completed remote builds in B247540: Diff 543343.Jul 23 2023, 8:25 PM

mdchen added a subscriber: mdchen.Aug 1 2023, 1:46 AM

Revision Contents

Path

Size

llvm/

include/

llvm/

Analysis/

TargetTransformInfo.h

12 lines

TargetTransformInfoImpl.h

8 lines

CodeGen/

BasicTTIImpl.h

16 lines

IR/

IRBuilder.h

11 lines

IntrinsicInst.h

100 lines

Intrinsics.td

15 lines

lib/

Analysis/

TargetTransformInfo.cpp

10 lines

VectorUtils.cpp

1 line

IR/

IRBuilder.cpp

59 lines

Transforms/

Vectorize/

LoopVectorizationLegality.cpp

2 lines

LoopVectorize.cpp

157 lines

VPlan.h

14 lines

VPlanRecipes.cpp

8 lines

Diff 543343

llvm/include/llvm/Analysis/TargetTransformInfo.h

Show First 20 Lines • Show All 737 Lines • ▼ Show 20 Lines	public:
/// Return the preferred addressing mode LSR should make efforts to generate.		/// Return the preferred addressing mode LSR should make efforts to generate.
AddressingModeKind getPreferredAddressingMode(const Loop *L,		AddressingModeKind getPreferredAddressingMode(const Loop *L,
ScalarEvolution *SE) const;		ScalarEvolution *SE) const;

/// Return true if the target supports masked store.		/// Return true if the target supports masked store.
bool isLegalMaskedStore(Type *DataType, Align Alignment) const;		bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
/// Return true if the target supports masked load.		/// Return true if the target supports masked load.
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;		bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
		/// Return true if the target supports masked load.
		bool isLegalMaskedPrefetch(Type *DataType, Align Alignment) const;

/// Return true if the target supports nontemporal store.		/// Return true if the target supports nontemporal store.
bool isLegalNTStore(Type *DataType, Align Alignment) const;		bool isLegalNTStore(Type *DataType, Align Alignment) const;
/// Return true if the target supports nontemporal load.		/// Return true if the target supports nontemporal load.
bool isLegalNTLoad(Type *DataType, Align Alignment) const;		bool isLegalNTLoad(Type *DataType, Align Alignment) const;

/// \Returns true if the target supports broadcasting a load to a vector of		/// \Returns true if the target supports broadcasting a load to a vector of
/// type <NumElements x ElementTy>.		/// type <NumElements x ElementTy>.
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;		bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;

/// Return true if the target supports masked scatter.		/// Return true if the target supports masked scatter.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;		bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
/// Return true if the target supports masked gather.		/// Return true if the target supports masked gather.
bool isLegalMaskedGather(Type *DataType, Align Alignment) const;		bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
		/// Return true if the target supports masked gather prefetch.
		bool isLegalMaskedGatherPrefetch(Type *DataType, Align Alignment) const;
/// Return true if the target forces scalarizing of llvm.masked.gather		/// Return true if the target forces scalarizing of llvm.masked.gather
/// intrinsics.		/// intrinsics.
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const;		bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const;
/// Return true if the target forces scalarizing of llvm.masked.scatter		/// Return true if the target forces scalarizing of llvm.masked.scatter
/// intrinsics.		/// intrinsics.
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const;		bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const;

/// Return true if the target supports masked compress store.		/// Return true if the target supports masked compress store.
▲ Show 20 Lines • Show All 996 Lines • ▼ Show 20 Lines	public:
virtual bool canMacroFuseCmp() = 0;		virtual bool canMacroFuseCmp() = 0;
virtual bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE,		virtual bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE,
LoopInfo LI, DominatorTree DT, AssumptionCache *AC,		LoopInfo LI, DominatorTree DT, AssumptionCache *AC,
TargetLibraryInfo *LibInfo) = 0;		TargetLibraryInfo *LibInfo) = 0;
virtual AddressingModeKind		virtual AddressingModeKind
getPreferredAddressingMode(const Loop L, ScalarEvolution SE) const = 0;		getPreferredAddressingMode(const Loop L, ScalarEvolution SE) const = 0;
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;		virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;		virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
		virtual bool isLegalMaskedPrefetch(Type *DataType, Align Alignment) = 0;
virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;		virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;		virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
virtual bool isLegalBroadcastLoad(Type *ElementTy,		virtual bool isLegalBroadcastLoad(Type *ElementTy,
ElementCount NumElements) const = 0;		ElementCount NumElements) const = 0;
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;		virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;		virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
		virtual bool isLegalMaskedGatherPrefetch(Type *DataType, Align Alignment) = 0;
virtual bool forceScalarizeMaskedGather(VectorType *DataType,		virtual bool forceScalarizeMaskedGather(VectorType *DataType,
Align Alignment) = 0;		Align Alignment) = 0;
virtual bool forceScalarizeMaskedScatter(VectorType *DataType,		virtual bool forceScalarizeMaskedScatter(VectorType *DataType,
Align Alignment) = 0;		Align Alignment) = 0;
virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;		virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;		virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,		virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
unsigned Opcode1,		unsigned Opcode1,
▲ Show 20 Lines • Show All 434 Lines • ▼ Show 20 Lines	AddressingModeKind
return Impl.getPreferredAddressingMode(L, SE);		return Impl.getPreferredAddressingMode(L, SE);
}		}
bool isLegalMaskedStore(Type *DataType, Align Alignment) override {		bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
return Impl.isLegalMaskedStore(DataType, Alignment);		return Impl.isLegalMaskedStore(DataType, Alignment);
}		}
bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {		bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
return Impl.isLegalMaskedLoad(DataType, Alignment);		return Impl.isLegalMaskedLoad(DataType, Alignment);
}		}
		bool isLegalMaskedPrefetch(Type *DataType, Align Alignment) override {
		return Impl.isLegalMaskedPrefetch(DataType, Alignment);
		}
bool isLegalNTStore(Type *DataType, Align Alignment) override {		bool isLegalNTStore(Type *DataType, Align Alignment) override {
return Impl.isLegalNTStore(DataType, Alignment);		return Impl.isLegalNTStore(DataType, Alignment);
}		}
bool isLegalNTLoad(Type *DataType, Align Alignment) override {		bool isLegalNTLoad(Type *DataType, Align Alignment) override {
return Impl.isLegalNTLoad(DataType, Alignment);		return Impl.isLegalNTLoad(DataType, Alignment);
}		}
bool isLegalBroadcastLoad(Type *ElementTy,		bool isLegalBroadcastLoad(Type *ElementTy,
ElementCount NumElements) const override {		ElementCount NumElements) const override {
return Impl.isLegalBroadcastLoad(ElementTy, NumElements);		return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
}		}
bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {		bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
return Impl.isLegalMaskedScatter(DataType, Alignment);		return Impl.isLegalMaskedScatter(DataType, Alignment);
}		}
bool isLegalMaskedGather(Type *DataType, Align Alignment) override {		bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
return Impl.isLegalMaskedGather(DataType, Alignment);		return Impl.isLegalMaskedGather(DataType, Alignment);
}		}
		bool isLegalMaskedGatherPrefetch(Type *DataType, Align Alignment) override {
		return Impl.isLegalMaskedGatherPrefetch(DataType, Alignment);
		}
bool forceScalarizeMaskedGather(VectorType *DataType,		bool forceScalarizeMaskedGather(VectorType *DataType,
Align Alignment) override {		Align Alignment) override {
return Impl.forceScalarizeMaskedGather(DataType, Alignment);		return Impl.forceScalarizeMaskedGather(DataType, Alignment);
}		}
bool forceScalarizeMaskedScatter(VectorType *DataType,		bool forceScalarizeMaskedScatter(VectorType *DataType,
Align Alignment) override {		Align Alignment) override {
return Impl.forceScalarizeMaskedScatter(DataType, Alignment);		return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
}		}
▲ Show 20 Lines • Show All 598 Lines • Show Last 20 Lines

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Show First 20 Lines • Show All 248 Lines • ▼ Show 20 Lines	public:
bool isLegalMaskedStore(Type *DataType, Align Alignment) const {		bool isLegalMaskedStore(Type *DataType, Align Alignment) const {
return false;		return false;
}		}

bool isLegalMaskedLoad(Type *DataType, Align Alignment) const {		bool isLegalMaskedLoad(Type *DataType, Align Alignment) const {
return false;		return false;
}		}

		bool isLegalMaskedPrefetch(Type *DataType, Align Alignment) const {
		return false;
		}

bool isLegalNTStore(Type *DataType, Align Alignment) const {		bool isLegalNTStore(Type *DataType, Align Alignment) const {
// By default, assume nontemporal memory stores are available for stores		// By default, assume nontemporal memory stores are available for stores
// that are aligned and have a size that is a power of 2.		// that are aligned and have a size that is a power of 2.
unsigned DataSize = DL.getTypeStoreSize(DataType);		unsigned DataSize = DL.getTypeStoreSize(DataType);
return Alignment >= DataSize && isPowerOf2_32(DataSize);		return Alignment >= DataSize && isPowerOf2_32(DataSize);
}		}

bool isLegalNTLoad(Type *DataType, Align Alignment) const {		bool isLegalNTLoad(Type *DataType, Align Alignment) const {
Show All 10 Lines	public:
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {		bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
return false;		return false;
}		}

bool isLegalMaskedGather(Type *DataType, Align Alignment) const {		bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
return false;		return false;
}		}

		bool isLegalMaskedGatherPrefetch(Type *DataType, Align Alignment) const {
		return false;
		}

bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const {		bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const {
return false;		return false;
}		}

bool forceScalarizeMaskedScatter(VectorType *DataType,		bool forceScalarizeMaskedScatter(VectorType *DataType,
Align Alignment) const {		Align Alignment) const {
return false;		return false;
}		}
▲ Show 20 Lines • Show All 1,096 Lines • Show Last 20 Lines

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Show First 20 Lines • Show All 1,550 Lines • ▼ Show 20 Lines	InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
}		}
case Intrinsic::masked_gather: {		case Intrinsic::masked_gather: {
const Value *Mask = Args[2];		const Value *Mask = Args[2];
bool VarMask = !isa<Constant>(Mask);		bool VarMask = !isa<Constant>(Mask);
Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();		Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();
return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],		return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
VarMask, Alignment, CostKind, I);		VarMask, Alignment, CostKind, I);
}		}
		case Intrinsic::masked_gather_prefetch: {
		const Value *Mask = Args[4];
		bool VarMask = !isa<Constant>(Mask);
		Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();
		auto *MaskVT = cast<VectorType>(Mask->getType());
		auto PsudoDataTy = MaskVT->getWithNewBitWidth(Alignment.value()8);
		return thisT()->getGatherScatterOpCost(Instruction::Call, PsudoDataTy, Args[0],
		VarMask, Alignment, CostKind, I);
		}
case Intrinsic::experimental_stepvector: {		case Intrinsic::experimental_stepvector: {
if (isa<ScalableVectorType>(RetTy))		if (isa<ScalableVectorType>(RetTy))
return BaseT::getIntrinsicInstrCost(ICA, CostKind);		return BaseT::getIntrinsicInstrCost(ICA, CostKind);
// The cost of materialising a constant integer vector.		// The cost of materialising a constant integer vector.
return TargetTransformInfo::TCC_Basic;		return TargetTransformInfo::TCC_Basic;
}		}
case Intrinsic::vector_extract: {		case Intrinsic::vector_extract: {
// FIXME: Handle case where a scalable vector is extracted from a scalable		// FIXME: Handle case where a scalable vector is extracted from a scalable
▲ Show 20 Lines • Show All 298 Lines • ▼ Show 20 Lines	case Intrinsic::masked_store: {
CostKind);		CostKind);
}		}
case Intrinsic::masked_load: {		case Intrinsic::masked_load: {
Type *Ty = RetTy;		Type *Ty = RetTy;
Align TyAlign = thisT()->DL.getABITypeAlign(Ty);		Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0,		return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0,
CostKind);		CostKind);
}		}
		case Intrinsic::masked_prefetch: {
		auto *MaskVT = cast<VectorType>(ICA.getArgTypes()[4]);
		Type *PsudoTy = MaskVT->getWithNewBitWidth(32);
		Align TyAlign = thisT()->DL.getABITypeAlign(PsudoTy);
		return thisT()->getMaskedMemoryOpCost(Instruction::Call, PsudoTy, TyAlign, 0,
		CostKind);
		}
case Intrinsic::vector_reduce_add:		case Intrinsic::vector_reduce_add:
return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy,		return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy,
std::nullopt, CostKind);		std::nullopt, CostKind);
case Intrinsic::vector_reduce_mul:		case Intrinsic::vector_reduce_mul:
return thisT()->getArithmeticReductionCost(Instruction::Mul, VecOpTy,		return thisT()->getArithmeticReductionCost(Instruction::Mul, VecOpTy,
std::nullopt, CostKind);		std::nullopt, CostKind);
case Intrinsic::vector_reduce_and:		case Intrinsic::vector_reduce_and:
return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy,		return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy,
▲ Show 20 Lines • Show All 593 Lines • Show Last 20 Lines

llvm/include/llvm/IR/IRBuilder.h

Show First 20 Lines • Show All 786 Lines • ▼ Show 20 Lines	public:
/// Create a call to Masked Load intrinsic		/// Create a call to Masked Load intrinsic
CallInst CreateMaskedLoad(Type Ty, Value Ptr, Align Alignment, Value Mask,		CallInst CreateMaskedLoad(Type Ty, Value Ptr, Align Alignment, Value Mask,
Value *PassThru = nullptr, const Twine &Name = "");		Value *PassThru = nullptr, const Twine &Name = "");

/// Create a call to Masked Store intrinsic		/// Create a call to Masked Store intrinsic
CallInst CreateMaskedStore(Value Val, Value *Ptr, Align Alignment,		CallInst CreateMaskedStore(Value Val, Value *Ptr, Align Alignment,
Value *Mask);		Value *Mask);

		/// Create a call to Masked Load intrinsic
		CallInst CreateMaskedPrefetch(Type Ty, Value Ptr, Align Alignment, Value Mask,
		Value RW = nullptr, Value Locality = nullptr,
		const Twine &Name = "");

/// Create a call to Masked Gather intrinsic		/// Create a call to Masked Gather intrinsic
CallInst CreateMaskedGather(Type Ty, Value *Ptrs, Align Alignment,		CallInst CreateMaskedGather(Type Ty, Value *Ptrs, Align Alignment,
Value Mask = nullptr, Value PassThru = nullptr,		Value Mask = nullptr, Value PassThru = nullptr,
const Twine &Name = "");		const Twine &Name = "");

/// Create a call to Masked Scatter intrinsic		/// Create a call to Masked Scatter intrinsic
CallInst CreateMaskedScatter(Value Val, Value *Ptrs, Align Alignment,		CallInst CreateMaskedScatter(Value Val, Value *Ptrs, Align Alignment,
Value *Mask = nullptr);		Value *Mask = nullptr);

		/// Create a call to Masked Gather Prefetch intrinsic
		CallInst CreateMaskedGatherPrefetch(Type Ty, Value *Ptrs, Align Alignment,
		Value Mask = nullptr, Value RW = nullptr,
		Value *Locality = nullptr,
		const Twine &Name = "");

/// Create a call to Masked Expand Load intrinsic		/// Create a call to Masked Expand Load intrinsic
CallInst CreateMaskedExpandLoad(Type Ty, Value Ptr, Value Mask = nullptr,		CallInst CreateMaskedExpandLoad(Type Ty, Value Ptr, Value Mask = nullptr,
Value *PassThru = nullptr,		Value *PassThru = nullptr,
const Twine &Name = "");		const Twine &Name = "");

/// Create a call to Masked Compress Store intrinsic		/// Create a call to Masked Compress Store intrinsic
CallInst CreateMaskedCompressStore(Value Val, Value *Ptr,		CallInst CreateMaskedCompressStore(Value Val, Value *Ptr,
Value *Mask = nullptr);		Value *Mask = nullptr);
▲ Show 20 Lines • Show All 1,868 Lines • Show Last 20 Lines

llvm/include/llvm/IR/IntrinsicInst.h

Show First 20 Lines • Show All 1,322 Lines • ▼ Show 20 Lines	default:
return false;		return false;
}		}
}		}
static bool classof(const Value *V) {		static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));		return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}		}
};		};

		/// This class prefetch intrinsic
		/// i.e. llvm.prefetch
		class PrefetchInst : public IntrinsicInst {
		public:
		static bool classof(const IntrinsicInst *I) {
		return I->getIntrinsicID() == Intrinsic::prefetch;
		}
		static bool classof(const Value *V) {
		return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
		}

		Value *getPointerOperand() { return getOperand(0); }
		const Value *getPointerOperand() const { return getOperand(0); }
		static unsigned getPointerOperandIndex() { return 0U; }
		Type *getPointerOperandType() const { return getPointerOperand()->getType(); }
		};

		/// A helper function that returns the pointer operand of a prefetch
		/// instruction. Returns nullptr if not prefetch.
		inline const Value getPrefetchPointerOperand(const Value V) {
		if (auto *Prefetch = dyn_cast<PrefetchInst>(V))
		return Prefetch->getPointerOperand();
		return nullptr;
		}
		inline Value getPrefetchPointerOperand(Value V) {
		return const_cast<Value *>(
		getPrefetchPointerOperand(static_cast<const Value *>(V)));
		}

		/// A helper function that returns the address space of the pointer operand of
		/// prefetch instruction.
		inline unsigned getPrefetchAddressSpace(Value *I) {
		assert(isa<PrefetchInst>(I) && "Expected prefetch instruction");
		auto *PtrTy = dyn_cast<PrefetchInst>(I)->getPointerOperandType();
		return dyn_cast<PointerType>(PtrTy)->getAddressSpace();
		}

		/// A helper function that returns the type of a prefetch instruction.
		inline Type getPrefetchType(Value I) {
		assert(isa<PrefetchInst>(I) && "Expected Prefetch instruction");
		auto *Prefetch = dyn_cast<PrefetchInst>(I);
		auto *GEP = dyn_cast<GetElementPtrInst>(Prefetch->getPointerOperand());
		if (GEP) {
		auto *ElemTy = GEP->getSourceElementType();
		auto *TempTy = dyn_cast<ArrayType>(ElemTy);
		while (TempTy) {
		ElemTy = TempTy->getArrayElementType();
		TempTy = dyn_cast<ArrayType>(ElemTy);
		}
		return isa<StructType>(ElemTy) ? Type::getInt64Ty(I->getContext()) : ElemTy;
		}
		auto *Alloca = dyn_cast<AllocaInst>(Prefetch->getPointerOperand());
		if (Alloca) {
		auto *ElemTy = Alloca->getAllocatedType()->getArrayElementType();
		return isa<StructType>(ElemTy) ? Type::getInt64Ty(I->getContext()) : ElemTy;
		}
		return nullptr;
		}

		/// A helper function that returns the alignment of prefetch instruction.
		inline Align getPrefetchAlignment(Value *I) {
		assert(isa<PrefetchInst>(I) && "Expected Prefetch instruction");
		auto *Ty = getPrefetchType(I);
		return Ty? Align(Ty->getScalarSizeInBits()>>3) : Align(1ULL);
		}

		/// A helper function that returns the alignment of load/store/prefetch instruction.
		inline Align getLdStPfAlignment(Value *I) {
		if (isa<PrefetchInst>(I))
		return getPrefetchAlignment(I);
		return getLoadStoreAlignment(I);
		}

		/// A helper function that returns the pointer operand of a load/store/prefetch
		/// instruction. Returns nullptr if not prefetch.
		inline const Value getLdStPfPointerOperand(const Value I) {
		if (isa<PrefetchInst>(I))
		return getPrefetchPointerOperand(I);
		return getLoadStorePointerOperand(I);
		}
		inline Value getLdStPfPointerOperand(Value V) {
		return const_cast<Value *>(
		getLdStPfPointerOperand(static_cast<const Value *>(V)));
		}

		/// A helper function that returns the address space of the pointer operand of
		/// load/store/prefetch instruction.
		inline unsigned getLdStPfAddressSpace(Value *I) {
		if (isa<PrefetchInst>(I))
		return getPrefetchAddressSpace(I);
		return getLoadStoreAddressSpace(I);
		}

		/// A helper function that returns the type of a load/store/prefetch instruction.
		inline Type getLdStPfType(Value I) {
		if (isa<PrefetchInst>(I))
		return getPrefetchType(I);
		return getLoadStoreType(I);
		}

/// This class represents any memmove intrinsic		/// This class represents any memmove intrinsic
/// i.e. llvm.element.unordered.atomic.memmove		/// i.e. llvm.element.unordered.atomic.memmove
/// and llvm.memmove		/// and llvm.memmove
class AnyMemMoveInst : public AnyMemTransferInst {		class AnyMemMoveInst : public AnyMemTransferInst {
public:		public:
static bool classof(const IntrinsicInst *I) {		static bool classof(const IntrinsicInst *I) {
switch (I->getIntrinsicID()) {		switch (I->getIntrinsicID()) {
case Intrinsic::memmove:		case Intrinsic::memmove:
▲ Show 20 Lines • Show All 268 Lines • Show Last 20 Lines

llvm/include/llvm/IR/Intrinsics.td

	Show First 20 Lines • Show All 2,202 Lines • ▼ Show 20 Lines

	def int_masked_compressstore:			def int_masked_compressstore:
	DefaultAttrsIntrinsic<[],			DefaultAttrsIntrinsic<[],
	[llvm_anyvector_ty, llvm_ptr_ty,			[llvm_anyvector_ty, llvm_ptr_ty,
	LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],			LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
	[IntrWriteMem, IntrArgMemOnly, IntrWillReturn,			[IntrWriteMem, IntrArgMemOnly, IntrWillReturn,
	NoCapture<ArgIndex<1>>]>;			NoCapture<ArgIndex<1>>]>;

				def int_masked_prefetch:
				DefaultAttrsIntrinsic<[],
				[llvm_anyptr_ty,
				llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty],
				[IntrInaccessibleMemOrArgMemOnly, IntrWillReturn,
				ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;

				def int_masked_gather_prefetch:
				DefaultAttrsIntrinsic<[],
				[llvm_anyvector_ty,
				llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
				LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
				[IntrInaccessibleMemOrArgMemOnly, IntrWillReturn,
				ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;

	// Test whether a pointer is associated with a type metadata identifier.			// Test whether a pointer is associated with a type metadata identifier.
	def int_type_test : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty],			def int_type_test : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty],
	[IntrNoMem, IntrWillReturn, IntrSpeculatable]>;			[IntrNoMem, IntrWillReturn, IntrSpeculatable]>;

	// Safely loads a function pointer from a virtual table pointer using type metadata.			// Safely loads a function pointer from a virtual table pointer using type metadata.
	def int_type_checked_load : DefaultAttrsIntrinsic<[llvm_ptr_ty, llvm_i1_ty],			def int_type_checked_load : DefaultAttrsIntrinsic<[llvm_ptr_ty, llvm_i1_ty],
	[llvm_ptr_ty, llvm_i32_ty, llvm_metadata_ty],			[llvm_ptr_ty, llvm_i32_ty, llvm_metadata_ty],
	[IntrNoMem, IntrWillReturn]>;			[IntrNoMem, IntrWillReturn]>;
	▲ Show 20 Lines • Show All 324 Lines • Show Last 20 Lines

llvm/lib/Analysis/TargetTransformInfo.cpp

Show First 20 Lines • Show All 427 Lines • ▼ Show 20 Lines	bool TargetTransformInfo::isLegalMaskedStore(Type *DataType,
return TTIImpl->isLegalMaskedStore(DataType, Alignment);		return TTIImpl->isLegalMaskedStore(DataType, Alignment);
}		}

bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,		bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
Align Alignment) const {		Align Alignment) const {
return TTIImpl->isLegalMaskedLoad(DataType, Alignment);		return TTIImpl->isLegalMaskedLoad(DataType, Alignment);
}		}

		bool TargetTransformInfo::isLegalMaskedPrefetch(Type *DataType,
		Align Alignment) const {
		return TTIImpl->isLegalMaskedPrefetch(DataType, Alignment);
		}

bool TargetTransformInfo::isLegalNTStore(Type *DataType,		bool TargetTransformInfo::isLegalNTStore(Type *DataType,
Align Alignment) const {		Align Alignment) const {
return TTIImpl->isLegalNTStore(DataType, Alignment);		return TTIImpl->isLegalNTStore(DataType, Alignment);
}		}

bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const {		bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const {
return TTIImpl->isLegalNTLoad(DataType, Alignment);		return TTIImpl->isLegalNTLoad(DataType, Alignment);
}		}
Show All 32 Lines
bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const {		bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const {
return TTIImpl->isLegalMaskedCompressStore(DataType);		return TTIImpl->isLegalMaskedCompressStore(DataType);
}		}

bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {		bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
return TTIImpl->isLegalMaskedExpandLoad(DataType);		return TTIImpl->isLegalMaskedExpandLoad(DataType);
}		}

		bool TargetTransformInfo::isLegalMaskedGatherPrefetch(Type *DataType,
		Align Alignment) const {
		return TTIImpl->isLegalMaskedGatherPrefetch(DataType, Alignment);
		}

bool TargetTransformInfo::enableOrderedReductions() const {		bool TargetTransformInfo::enableOrderedReductions() const {
return TTIImpl->enableOrderedReductions();		return TTIImpl->enableOrderedReductions();
}		}

bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {		bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
return TTIImpl->hasDivRemOp(DataType, IsSigned);		return TTIImpl->hasDivRemOp(DataType, IsSigned);
}		}

▲ Show 20 Lines • Show All 797 Lines • Show Last 20 Lines

llvm/lib/Analysis/VectorUtils.cpp

Show First 20 Lines • Show All 86 Lines • ▼ Show 20 Lines	bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::pow:		case Intrinsic::pow:
case Intrinsic::fma:		case Intrinsic::fma:
case Intrinsic::fmuladd:		case Intrinsic::fmuladd:
case Intrinsic::is_fpclass:		case Intrinsic::is_fpclass:
case Intrinsic::powi:		case Intrinsic::powi:
case Intrinsic::canonicalize:		case Intrinsic::canonicalize:
case Intrinsic::fptosi_sat:		case Intrinsic::fptosi_sat:
case Intrinsic::fptoui_sat:		case Intrinsic::fptoui_sat:
		case Intrinsic::prefetch:
return true;		return true;
default:		default:
return false;		return false;
}		}
}		}

/// Identifies if the vector form of the intrinsic has a scalar operand.		/// Identifies if the vector form of the intrinsic has a scalar operand.
bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,		bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
▲ Show 20 Lines • Show All 1,413 Lines • Show Last 20 Lines

llvm/lib/IR/IRBuilder.cpp

Show First 20 Lines • Show All 596 Lines • ▼ Show 20 Lines	CallInst IRBuilderBase::CreateMaskedStore(Value Val, Value *Ptr,
Type *DataTy = Val->getType();		Type *DataTy = Val->getType();
assert(DataTy->isVectorTy() && "Val should be a vector");		assert(DataTy->isVectorTy() && "Val should be a vector");
assert(Mask && "Mask should not be all-ones (null)");		assert(Mask && "Mask should not be all-ones (null)");
Type *OverloadedTypes[] = { DataTy, PtrTy };		Type *OverloadedTypes[] = { DataTy, PtrTy };
Value *Ops[] = {Val, Ptr, getInt32(Alignment.value()), Mask};		Value *Ops[] = {Val, Ptr, getInt32(Alignment.value()), Mask};
return CreateMaskedIntrinsic(Intrinsic::masked_store, Ops, OverloadedTypes);		return CreateMaskedIntrinsic(Intrinsic::masked_store, Ops, OverloadedTypes);
}		}

		/// Create a call to a Masked Prefetch intrinsic.
		/// \p Ty - vector type to load
		/// \p Ptr - base pointer for the load
		/// \p Alignment - alignment of the destination location
		/// \p Mask - vector of booleans which indicates what vector lanes should
		/// be accessed in memory
		/// \p RW - Read or Write
		/// \p Locality - Cache Level
		/// \p Name - name of the result variable
		CallInst IRBuilderBase::CreateMaskedPrefetch(Type Ty, Value *Ptr,
		Align Alignment,
		Value Mask, Value RW,
		Value *Locality,
		const Twine &Name) {
		auto *PtrTy = cast<PointerType>(Ptr->getType());
		assert(Ty->isVectorTy() && "Type should be vector");
		assert(PtrTy->isOpaqueOrPointeeTypeMatches(Ty) && "Wrong element type");
		assert(Mask && "Mask should not be all-ones (null)");
		Type *OverloadedTypes[] = {PtrTy, Mask->getType()};
		Value *Ops[] = {Ptr, getInt32(Alignment.value()), RW, Locality, Mask};
		return CreateMaskedIntrinsic(Intrinsic::masked_prefetch, Ops,
		OverloadedTypes, Name);
		}

/// Create a call to a Masked intrinsic, with given intrinsic Id,		/// Create a call to a Masked intrinsic, with given intrinsic Id,
/// an array of operands - Ops, and an array of overloaded types -		/// an array of operands - Ops, and an array of overloaded types -
/// OverloadedTypes.		/// OverloadedTypes.
CallInst *IRBuilderBase::CreateMaskedIntrinsic(Intrinsic::ID Id,		CallInst *IRBuilderBase::CreateMaskedIntrinsic(Intrinsic::ID Id,
ArrayRef<Value *> Ops,		ArrayRef<Value *> Ops,
ArrayRef<Type *> OverloadedTypes,		ArrayRef<Type *> OverloadedTypes,
const Twine &Name) {		const Twine &Name) {
Module *M = BB->getParent()->getParent();		Module *M = BB->getParent()->getParent();
▲ Show 20 Lines • Show All 90 Lines • ▼ Show 20 Lines	CallInst IRBuilderBase::CreateMaskedCompressStore(Value Val, Value *Ptr,
assert(DataTy->isVectorTy() && "Val should be a vector");		assert(DataTy->isVectorTy() && "Val should be a vector");
assert(Mask && "Mask should not be all-ones (null)");		assert(Mask && "Mask should not be all-ones (null)");
Type *OverloadedTypes[] = {DataTy};		Type *OverloadedTypes[] = {DataTy};
Value *Ops[] = {Val, Ptr, Mask};		Value *Ops[] = {Val, Ptr, Mask};
return CreateMaskedIntrinsic(Intrinsic::masked_compressstore, Ops,		return CreateMaskedIntrinsic(Intrinsic::masked_compressstore, Ops,
OverloadedTypes);		OverloadedTypes);
}		}

		/// Create a call to a Masked Gather Prefetch intrinsic.
		/// \p Ty - vector type to gather
		/// \p Ptrs - vector of pointers for loading
		/// \p Align - alignment of the destination location
		/// \p Mask - vector of booleans which indicates what vector lanes should
		/// be accessed in memory
		/// \p RW - Read or Write
		/// \p Locality - Cache Level
		/// \p Name - name of the result variable
		CallInst IRBuilderBase::CreateMaskedGatherPrefetch(Type Ty, Value *Ptrs, Align Alignment,
		Value Mask, Value RW,
		Value *Locality,
		const Twine &Name) {
		auto *VecTy = cast<VectorType>(Ty);
		ElementCount NumElts = VecTy->getElementCount();
		auto *PtrsTy = cast<VectorType>(Ptrs->getType());
		assert(cast<PointerType>(PtrsTy->getElementType())
		->isOpaqueOrPointeeTypeMatches(
		cast<VectorType>(Ty)->getElementType()) &&
		"Element type mismatch");
		assert(NumElts == PtrsTy->getElementCount() && "Element count mismatch");

		if (!Mask)
		Mask = Constant::getAllOnesValue(
		VectorType::get(Type::getInt1Ty(Context), NumElts));

		Type *OverloadedTypes[] = {PtrsTy};
		Value *Ops[] = {Ptrs, getInt32(Alignment.value()), RW, Locality, Mask};

		// We specify only one type when we create this intrinsic. Types of other
		// arguments are derived from this type.
		return CreateMaskedIntrinsic(Intrinsic::masked_gather_prefetch, Ops, OverloadedTypes,
		Name);
		}

template <typename T0>		template <typename T0>
static std::vector<Value *>		static std::vector<Value *>
getStatepointArgs(IRBuilderBase &B, uint64_t ID, uint32_t NumPatchBytes,		getStatepointArgs(IRBuilderBase &B, uint64_t ID, uint32_t NumPatchBytes,
Value *ActualCallee, uint32_t Flags, ArrayRef<T0> CallArgs) {		Value *ActualCallee, uint32_t Flags, ArrayRef<T0> CallArgs) {
std::vector<Value *> Args;		std::vector<Value *> Args;
Args.push_back(B.getInt64(ID));		Args.push_back(B.getInt64(ID));
Args.push_back(B.getInt32(NumPatchBytes));		Args.push_back(B.getInt32(NumPatchBytes));
Args.push_back(ActualCallee);		Args.push_back(ActualCallee);
▲ Show 20 Lines • Show All 653 Lines • Show Last 20 Lines

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Show First 20 Lines • Show All 594 Lines • ▼ Show 20 Lines	return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
const SCEV *IthLaneExpr =		const SCEV *IthLaneExpr =
SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);		SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
return FirstLaneExpr == IthLaneExpr;		return FirstLaneExpr == IthLaneExpr;
});		});
}		}

bool LoopVectorizationLegality::isUniformMemOp(Instruction &I,		bool LoopVectorizationLegality::isUniformMemOp(Instruction &I,
ElementCount VF) const {		ElementCount VF) const {
Value *Ptr = getLoadStorePointerOperand(&I);		Value *Ptr = getLdStPfPointerOperand(&I);
if (!Ptr)		if (!Ptr)
return false;		return false;
// Note: There's nothing inherent which prevents predicated loads and		// Note: There's nothing inherent which prevents predicated loads and
// stores from being uniform. The current lowering simply doesn't handle		// stores from being uniform. The current lowering simply doesn't handle
// it; in particular, the cost model distinguishes scatter/gather from		// it; in particular, the cost model distinguishes scatter/gather from
// scalar w/predication, and we currently rely on the scalar path.		// scalar w/predication, and we currently rely on the scalar path.
return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());		return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
}		}
▲ Show 20 Lines • Show All 968 Lines • Show Last 20 Lines

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,464 Lines • ▼ Show 20 Lines	public:

/// Returns true if the target machine supports masked load operation		/// Returns true if the target machine supports masked load operation
/// for the given \p DataType and kind of access to \p Ptr.		/// for the given \p DataType and kind of access to \p Ptr.
bool isLegalMaskedLoad(Type DataType, Value Ptr, Align Alignment) const {		bool isLegalMaskedLoad(Type DataType, Value Ptr, Align Alignment) const {
return Legal->isConsecutivePtr(DataType, Ptr) &&		return Legal->isConsecutivePtr(DataType, Ptr) &&
TTI.isLegalMaskedLoad(DataType, Alignment);		TTI.isLegalMaskedLoad(DataType, Alignment);
}		}

		/// Returns true if the target machine supports masked prefetch operation
		/// for the given \p DataType and kind of access to \p Ptr.
		bool isLegalMaskedPrefetch(Type DataType, Value Ptr, Align Alignment) const {
		return Legal->isConsecutivePtr(DataType, Ptr) &&
		TTI.isLegalMaskedPrefetch(DataType, Alignment);
		}

/// Returns true if the target machine can represent \p V as a masked gather		/// Returns true if the target machine can represent \p V as a masked gather
/// or scatter operation.		/// or scatter operation.
bool isLegalGatherOrScatter(Value *V, ElementCount VF) {		bool isLegalGatherOrScatter(Value *V, ElementCount VF) {
bool LI = isa<LoadInst>(V);		bool LI = isa<LoadInst>(V);
bool SI = isa<StoreInst>(V);		bool SI = isa<StoreInst>(V);
if (!LI && !SI)		bool PF = isa<PrefetchInst>(V);
		if (!LI && !SI && !PF)
return false;		return false;
auto *Ty = getLoadStoreType(V);		auto *Ty = getLdStPfType(V);
Align Align = getLoadStoreAlignment(V);		Align Align = getLdStPfAlignment(V);
if (VF.isVector())		if (VF.isVector())
Ty = VectorType::get(Ty, VF);		Ty = VectorType::get(Ty, VF);
return (LI && TTI.isLegalMaskedGather(Ty, Align)) \|\|		return (LI && TTI.isLegalMaskedGather(Ty, Align)) \|\|
(SI && TTI.isLegalMaskedScatter(Ty, Align));		(SI && TTI.isLegalMaskedScatter(Ty, Align)) \|\|
		(PF && TTI.isLegalMaskedPrefetch(Ty, Align));
}		}

/// Returns true if the target machine supports all of the reduction		/// Returns true if the target machine supports all of the reduction
/// variables found for the given VF.		/// variables found for the given VF.
bool canVectorizeReductions(ElementCount VF) const {		bool canVectorizeReductions(ElementCount VF) const {
return (all_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool {		return (all_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool {
const RecurrenceDescriptor &RdxDesc = Reduction.second;		const RecurrenceDescriptor &RdxDesc = Reduction.second;
return TTI.isLegalToVectorizeReduction(RdxDesc, VF);		return TTI.isLegalToVectorizeReduction(RdxDesc, VF);
▲ Show 20 Lines • Show All 2,902 Lines • ▼ Show 20 Lines	bool LoopVectorizationCostModel::isScalarWithPredication(
if (!isPredicatedInst(I))		if (!isPredicatedInst(I))
return false;		return false;

// Do we have a non-scalar lowering for this predicated		// Do we have a non-scalar lowering for this predicated
// instruction? No - it is scalar with predication.		// instruction? No - it is scalar with predication.
switch(I->getOpcode()) {		switch(I->getOpcode()) {
default:		default:
return true;		return true;
case Instruction::Call:		case Instruction::Call: {
		if (!isa<PrefetchInst>(I))
return !VFDatabase::hasMaskedVariant(*(cast<CallInst>(I)), VF);		return !VFDatabase::hasMaskedVariant(*(cast<CallInst>(I)), VF);
		auto *Ptr = getPrefetchPointerOperand(I);
		auto *Ty = getPrefetchType(I);
		Type *VTy = Ty;
		if (VF.isVector())
		VTy = VectorType::get(Ty, VF);
		const Align Alignment = getPrefetchAlignment(I);
		return !(isLegalMaskedPrefetch(Ty, Ptr, Alignment) \|\|
		TTI.isLegalMaskedGatherPrefetch(VTy, Alignment));
		}
case Instruction::Load:		case Instruction::Load:
case Instruction::Store: {		case Instruction::Store: {
auto *Ptr = getLoadStorePointerOperand(I);		auto *Ptr = getLoadStorePointerOperand(I);
auto *Ty = getLoadStoreType(I);		auto *Ty = getLoadStoreType(I);
Type *VTy = Ty;		Type *VTy = Ty;
if (VF.isVector())		if (VF.isVector())
VTy = VectorType::get(Ty, VF);		VTy = VectorType::get(Ty, VF);
const Align Alignment = getLoadStoreAlignment(I);		const Align Alignment = getLoadStoreAlignment(I);
▲ Show 20 Lines • Show All 190 Lines • ▼ Show 20 Lines	bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
const Align Alignment = getLoadStoreAlignment(I);		const Align Alignment = getLoadStoreAlignment(I);
return isa<LoadInst>(I) ? TTI.isLegalMaskedLoad(Ty, Alignment)		return isa<LoadInst>(I) ? TTI.isLegalMaskedLoad(Ty, Alignment)
: TTI.isLegalMaskedStore(Ty, Alignment);		: TTI.isLegalMaskedStore(Ty, Alignment);
}		}

bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(		bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
Instruction *I, ElementCount VF) {		Instruction *I, ElementCount VF) {
// Get and ensure we have a valid memory instruction.		// Get and ensure we have a valid memory instruction.
assert((isa<LoadInst, StoreInst>(I)) && "Invalid memory instruction");		assert((isa<LoadInst, StoreInst, PrefetchInst>(I)) && "Invalid memory instruction");

auto *Ptr = getLoadStorePointerOperand(I);		auto *Ptr = getLdStPfPointerOperand(I);
auto *ScalarTy = getLoadStoreType(I);		auto *ScalarTy = getLdStPfType(I);

// In order to be widened, the pointer should be consecutive, first of all.		// In order to be widened, the pointer should be consecutive, first of all.
if (!Legal->isConsecutivePtr(ScalarTy, Ptr))		if (!Legal->isConsecutivePtr(ScalarTy, Ptr))
return false;		return false;

// If the instruction is a store located in a predicated block, it will be		// If the instruction is a store located in a predicated block, it will be
// scalarized.		// scalarized.
if (isScalarWithPredication(I, VF))		if (isScalarWithPredication(I, VF))
▲ Show 20 Lines • Show All 1,840 Lines • ▼ Show 20 Lines
InstructionCost		InstructionCost
LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,		LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
ElementCount VF) {		ElementCount VF) {
assert(VF.isVector() &&		assert(VF.isVector() &&
"Scalarization cost of instruction implies vectorization.");		"Scalarization cost of instruction implies vectorization.");
if (VF.isScalable())		if (VF.isScalable())
return InstructionCost::getInvalid();		return InstructionCost::getInvalid();

Type *ValTy = getLoadStoreType(I);		Type *ValTy = getLdStPfType(I);
auto SE = PSE.getSE();		auto SE = PSE.getSE();

unsigned AS = getLoadStoreAddressSpace(I);		unsigned AS = getLdStPfAddressSpace(I);
Value *Ptr = getLoadStorePointerOperand(I);		Value *Ptr = getLdStPfPointerOperand(I);
Type *PtrTy = ToVectorTy(Ptr->getType(), VF);		Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
// NOTE: PtrTy is a vector to signal `TTI::getAddressComputationCost`		// NOTE: PtrTy is a vector to signal `TTI::getAddressComputationCost`
// that it is being called from this specific place.		// that it is being called from this specific place.

// Figure out whether the access is strided and get the stride value		// Figure out whether the access is strided and get the stride value
// if it's known in compile time		// if it's known in compile time
const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, PSE, TheLoop);		const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, PSE, TheLoop);

// Get the cost of the scalar memory instruction and address computation.		// Get the cost of the scalar memory instruction and address computation.
InstructionCost Cost =		InstructionCost Cost =
VF.getKnownMinValue() * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);		VF.getKnownMinValue() * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);

// Don't pass *I here, since it is scalar but will actually be part of a		// Don't pass *I here, since it is scalar but will actually be part of a
// vectorized loop where the user of it is a vectorized instruction.		// vectorized loop where the user of it is a vectorized instruction.
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;		TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
const Align Alignment = getLoadStoreAlignment(I);		const Align Alignment = getLdStPfAlignment(I);
Cost += VF.getKnownMinValue() * TTI.getMemoryOpCost(I->getOpcode(),		Cost += VF.getKnownMinValue() * TTI.getMemoryOpCost(I->getOpcode(),
ValTy->getScalarType(),		ValTy->getScalarType(),
Alignment, AS, CostKind);		Alignment, AS, CostKind);

// Get the overhead of the extractelement and insertelement instructions		// Get the overhead of the extractelement and insertelement instructions
// we might create due to scalarization.		// we might create due to scalarization.
Cost += getScalarizationOverhead(I, VF, CostKind);		Cost += getScalarizationOverhead(I, VF, CostKind);

Show All 18 Lines	LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
}		}

return Cost;		return Cost;
}		}

InstructionCost		InstructionCost
LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,		LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
ElementCount VF) {		ElementCount VF) {
Type *ValTy = getLoadStoreType(I);		Type *ValTy = getLdStPfType(I);
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));		auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
Value *Ptr = getLoadStorePointerOperand(I);		Value *Ptr = getLdStPfPointerOperand(I);
unsigned AS = getLoadStoreAddressSpace(I);		unsigned AS = getLdStPfAddressSpace(I);
int ConsecutiveStride = Legal->isConsecutivePtr(ValTy, Ptr);		int ConsecutiveStride = Legal->isConsecutivePtr(ValTy, Ptr);
enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;		enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;

assert((ConsecutiveStride == 1 \|\| ConsecutiveStride == -1) &&		assert((ConsecutiveStride == 1 \|\| ConsecutiveStride == -1) &&
"Stride should be 1 or -1 for consecutive memory access");		"Stride should be 1 or -1 for consecutive memory access");
const Align Alignment = getLoadStoreAlignment(I);		const Align Alignment = getLdStPfAlignment(I);
InstructionCost Cost = 0;		InstructionCost Cost = 0;
if (Legal->isMaskRequired(I)) {		if (Legal->isMaskRequired(I)) {
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,		Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
CostKind);		CostKind);
} else {		} else {
TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0));		TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0));
Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,		Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
CostKind, OpInfo, I);		CostKind, OpInfo, I);
}		}

bool Reverse = ConsecutiveStride < 0;		bool Reverse = ConsecutiveStride < 0;
if (Reverse)		if (Reverse)
Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy,		Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy,
std::nullopt, CostKind, 0);		std::nullopt, CostKind, 0);
return Cost;		return Cost;
}		}

InstructionCost		InstructionCost
LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,		LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
ElementCount VF) {		ElementCount VF) {
assert(Legal->isUniformMemOp(*I, VF));		assert(Legal->isUniformMemOp(*I, VF));

Type *ValTy = getLoadStoreType(I);		Type *ValTy = getLdStPfType(I);
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));		auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
const Align Alignment = getLoadStoreAlignment(I);		const Align Alignment = getLdStPfAlignment(I);
unsigned AS = getLoadStoreAddressSpace(I);		unsigned AS = getLdStPfAddressSpace(I);
enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;		enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
		if (isa<PrefetchInst>(I)) {
		return TTI.getAddressComputationCost(ValTy) +
		TTI.getMemoryOpCost(Instruction::Call, ValTy, Alignment, AS,
		CostKind);
		}
if (isa<LoadInst>(I)) {		if (isa<LoadInst>(I)) {
return TTI.getAddressComputationCost(ValTy) +		return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS,		TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS,
CostKind) +		CostKind) +
TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy);		TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy);
}		}
StoreInst *SI = cast<StoreInst>(I);		StoreInst *SI = cast<StoreInst>(I);

bool isLoopInvariantStoreValue = Legal->isInvariant(SI->getValueOperand());		bool isLoopInvariantStoreValue = Legal->isInvariant(SI->getValueOperand());
return TTI.getAddressComputationCost(ValTy) +		return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS,		TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS,
CostKind) +		CostKind) +
(isLoopInvariantStoreValue		(isLoopInvariantStoreValue
? 0		? 0
: TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy,		: TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy,
CostKind, VF.getKnownMinValue() - 1));		CostKind, VF.getKnownMinValue() - 1));
}		}

InstructionCost		InstructionCost
LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,		LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
ElementCount VF) {		ElementCount VF) {
Type *ValTy = getLoadStoreType(I);		Type *ValTy = getLdStPfType(I);
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));		auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
const Align Alignment = getLoadStoreAlignment(I);		const Align Alignment = getLdStPfAlignment(I);
const Value *Ptr = getLoadStorePointerOperand(I);		const Value *Ptr = getLdStPfPointerOperand(I);

return TTI.getAddressComputationCost(VectorTy) +		return TTI.getAddressComputationCost(VectorTy) +
TTI.getGatherScatterOpCost(		TTI.getGatherScatterOpCost(
I->getOpcode(), VectorTy, Ptr, Legal->isMaskRequired(I), Alignment,		I->getOpcode(), VectorTy, Ptr, Legal->isMaskRequired(I), Alignment,
TargetTransformInfo::TCK_RecipThroughput, I);		TargetTransformInfo::TCK_RecipThroughput, I);
}		}

InstructionCost		InstructionCost
▲ Show 20 Lines • Show All 209 Lines • ▼ Show 20 Lines
}		}

InstructionCost		InstructionCost
LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,		LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
ElementCount VF) {		ElementCount VF) {
// Calculate scalar cost only. Vectorization cost should be ready at this		// Calculate scalar cost only. Vectorization cost should be ready at this
// moment.		// moment.
if (VF.isScalar()) {		if (VF.isScalar()) {
Type *ValTy = getLoadStoreType(I);		Type *ValTy = getLdStPfType(I);
const Align Alignment = getLoadStoreAlignment(I);		const Align Alignment = getLdStPfAlignment(I);
unsigned AS = getLoadStoreAddressSpace(I);		unsigned AS = getLdStPfAddressSpace(I);

TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0));		TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0));
return TTI.getAddressComputationCost(ValTy) +		return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS,		TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS,
TTI::TCK_RecipThroughput, OpInfo, I);		TTI::TCK_RecipThroughput, OpInfo, I);
}		}
return getWideningCost(I, VF);		return getWideningCost(I, VF);
}		}
▲ Show 20 Lines • Show All 84 Lines • ▼ Show 20 Lines

void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {		void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
if (VF.isScalar())		if (VF.isScalar())
return;		return;
NumPredStores = 0;		NumPredStores = 0;
for (BasicBlock *BB : TheLoop->blocks()) {		for (BasicBlock *BB : TheLoop->blocks()) {
// For each instruction in the old loop.		// For each instruction in the old loop.
for (Instruction &I : *BB) {		for (Instruction &I : *BB) {
Value *Ptr = getLoadStorePointerOperand(&I);		Value *Ptr = getLdStPfPointerOperand(&I);
if (!Ptr)		if (!Ptr)
continue;		continue;

// TODO: We should generate better code and update the cost model for		// TODO: We should generate better code and update the cost model for
// predicated uniform stores. Today they are treated as any other		// predicated uniform stores. Today they are treated as any other
// predicated store (see added test cases in		// predicated store (see added test cases in
// invariant-store-vectorization.ll).		// invariant-store-vectorization.ll).
if (isa<StoreInst>(&I) && isScalarWithPredication(&I, VF))		if (isa<StoreInst>(&I) && isScalarWithPredication(&I, VF))
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines	for (Instruction &I : *BB) {
setWideningDecision(&I, VF, CM_Scalarize, ScalarizationCost);		setWideningDecision(&I, VF, CM_Scalarize, ScalarizationCost);
continue;		continue;
}		}

// We assume that widening is the best solution when possible.		// We assume that widening is the best solution when possible.
if (memoryInstructionCanBeWidened(&I, VF)) {		if (memoryInstructionCanBeWidened(&I, VF)) {
InstructionCost Cost = getConsecutiveMemOpCost(&I, VF);		InstructionCost Cost = getConsecutiveMemOpCost(&I, VF);
int ConsecutiveStride = Legal->isConsecutivePtr(		int ConsecutiveStride = Legal->isConsecutivePtr(
getLoadStoreType(&I), getLoadStorePointerOperand(&I));		getLdStPfType(&I), getLdStPfPointerOperand(&I));
assert((ConsecutiveStride == 1 \|\| ConsecutiveStride == -1) &&		assert((ConsecutiveStride == 1 \|\| ConsecutiveStride == -1) &&
"Expected consecutive stride.");		"Expected consecutive stride.");
InstWidening Decision =		InstWidening Decision =
ConsecutiveStride == 1 ? CM_Widen : CM_Widen_Reverse;		ConsecutiveStride == 1 ? CM_Widen : CM_Widen_Reverse;
setWideningDecision(&I, VF, Decision, Cost);		setWideningDecision(&I, VF, Decision, Cost);
continue;		continue;
}		}

▲ Show 20 Lines • Show All 417 Lines • ▼ Show 20 Lines	if (canTruncateToMinimalBitwidth(I, VF)) {
VectorTy =		VectorTy =
smallestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy);		smallestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy);
}		}
}		}

return TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I);		return TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I);
}		}
case Instruction::Call: {		case Instruction::Call: {
		if (isa<PrefetchInst>(I)) {
		ElementCount Width = VF;
		if (Width.isVector()) {
		InstWidening Decision = getWideningDecision(I, Width);
		assert(Decision != CM_Unknown &&
		"CM decision should be taken at this point");
		if (getWideningCost(I, VF) == InstructionCost::getInvalid())
		return InstructionCost::getInvalid();
		if (Decision == CM_Scalarize)
		Width = ElementCount::getFixed(1);
		}
		VectorTy = ToVectorTy(getLdStPfType(I), Width);
		return getMemoryInstructionCost(I, VF);
		}
if (RecurrenceDescriptor::isFMulAddIntrinsic(I))		if (RecurrenceDescriptor::isFMulAddIntrinsic(I))
if (auto RedCost = getReductionPatternCost(I, VF, VectorTy, CostKind))		if (auto RedCost = getReductionPatternCost(I, VF, VectorTy, CostKind))
return *RedCost;		return *RedCost;
Function *Variant;		Function *Variant;
CallInst *CI = cast<CallInst>(I);		CallInst *CI = cast<CallInst>(I);
InstructionCost CallCost = getVectorCallCost(CI, VF, &Variant);		InstructionCost CallCost = getVectorCallCost(CI, VF, &Variant);
if (getVectorIntrinsicIDForCall(CI, TLI)) {		if (getVectorIntrinsicIDForCall(CI, TLI)) {
InstructionCost IntrinsicCost = getVectorIntrinsicCost(CI, VF);		InstructionCost IntrinsicCost = getVectorIntrinsicCost(CI, VF);
▲ Show 20 Lines • Show All 816 Lines • ▼ Show 20 Lines	VPValue VPRecipeBuilder::createBlockInMask(BasicBlock BB, VPlan &Plan) {

return BlockMaskCache[BB] = BlockMask;		return BlockMaskCache[BB] = BlockMask;
}		}

VPRecipeBase VPRecipeBuilder::tryToWidenMemory(Instruction I,		VPRecipeBase VPRecipeBuilder::tryToWidenMemory(Instruction I,
ArrayRef<VPValue *> Operands,		ArrayRef<VPValue *> Operands,
VFRange &Range,		VFRange &Range,
VPlanPtr &Plan) {		VPlanPtr &Plan) {
assert((isa<LoadInst>(I) \|\| isa<StoreInst>(I)) &&		assert((isa<LoadInst>(I) \|\| isa<StoreInst>(I) \|\| isa<PrefetchInst>(I)) &&
"Must be called with either a load or store");		"Must be called with either a load or store");

auto willWiden = [&](ElementCount VF) -> bool {		auto willWiden = [&](ElementCount VF) -> bool {
LoopVectorizationCostModel::InstWidening Decision =		LoopVectorizationCostModel::InstWidening Decision =
CM.getWideningDecision(I, VF);		CM.getWideningDecision(I, VF);
assert(Decision != LoopVectorizationCostModel::CM_Unknown &&		assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
"CM decision should be taken at this point.");		"CM decision should be taken at this point.");
if (Decision == LoopVectorizationCostModel::CM_Interleave)		if (Decision == LoopVectorizationCostModel::CM_Interleave)
Show All 18 Lines	VPRecipeBase VPRecipeBuilder::tryToWidenMemory(Instruction I,
bool Reverse = Decision == LoopVectorizationCostModel::CM_Widen_Reverse;		bool Reverse = Decision == LoopVectorizationCostModel::CM_Widen_Reverse;
bool Consecutive =		bool Consecutive =
Reverse \|\| Decision == LoopVectorizationCostModel::CM_Widen;		Reverse \|\| Decision == LoopVectorizationCostModel::CM_Widen;

if (LoadInst *Load = dyn_cast<LoadInst>(I))		if (LoadInst *Load = dyn_cast<LoadInst>(I))
return new VPWidenMemoryInstructionRecipe(*Load, Operands[0], Mask,		return new VPWidenMemoryInstructionRecipe(*Load, Operands[0], Mask,
Consecutive, Reverse);		Consecutive, Reverse);

		if (PrefetchInst *Prefetch = dyn_cast<PrefetchInst>(I))
		return new VPWidenMemoryInstructionRecipe(*Prefetch, Operands[0], Mask,
		Consecutive, Reverse);

StoreInst *Store = cast<StoreInst>(I);		StoreInst *Store = cast<StoreInst>(I);
return new VPWidenMemoryInstructionRecipe(*Store, Operands[1], Operands[0],		return new VPWidenMemoryInstructionRecipe(*Store, Operands[1], Operands[0],
Mask, Consecutive, Reverse);		Mask, Consecutive, Reverse);
}		}

/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also		/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
/// insert a recipe to expand the step for the induction recipe.		/// insert a recipe to expand the step for the induction recipe.
static VPWidenIntOrFpInductionRecipe *		static VPWidenIntOrFpInductionRecipe *
▲ Show 20 Lines • Show All 403 Lines • ▼ Show 20 Lines	if (isa<TruncInst>(Instr) &&
Range, *Plan)))		Range, *Plan)))
return toVPRecipeResult(Recipe);		return toVPRecipeResult(Recipe);

// All widen recipes below deal only with VF > 1.		// All widen recipes below deal only with VF > 1.
if (LoopVectorizationPlanner::getDecisionAndClampRange(		if (LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) { return VF.isScalar(); }, Range))		[&](ElementCount VF) { return VF.isScalar(); }, Range))
return nullptr;		return nullptr;

if (auto *CI = dyn_cast<CallInst>(Instr))		if (isa<CallInst>(Instr) && !isa<PrefetchInst>(Instr)) {
		auto *CI = dyn_cast<CallInst>(Instr);
return toVPRecipeResult(tryToWidenCall(CI, Operands, Range, Plan));		return toVPRecipeResult(tryToWidenCall(CI, Operands, Range, Plan));
		}

if (isa<LoadInst>(Instr) \|\| isa<StoreInst>(Instr))		if (isa<LoadInst>(Instr) \|\| isa<StoreInst>(Instr) \|\| isa<PrefetchInst>(Instr))
return toVPRecipeResult(tryToWidenMemory(Instr, Operands, Range, Plan));		return toVPRecipeResult(tryToWidenMemory(Instr, Operands, Range, Plan));

if (!shouldWiden(Instr, Range))		if (!shouldWiden(Instr, Range))
return nullptr;		return nullptr;

if (auto GEP = dyn_cast<GetElementPtrInst>(Instr))		if (auto GEP = dyn_cast<GetElementPtrInst>(Instr))
return toVPRecipeResult(new VPWidenGEPRecipe(		return toVPRecipeResult(new VPWidenGEPRecipe(
GEP, make_range(Operands.begin(), Operands.end())));		GEP, make_range(Operands.begin(), Operands.end())));
▲ Show 20 Lines • Show All 912 Lines • ▼ Show 20 Lines	if (State.VF.isVector() && shouldPack()) {
State.ILV->packScalarIntoVectorValue(this, *State.Instance, State);		State.ILV->packScalarIntoVectorValue(this, *State.Instance, State);
}		}
return;		return;
}		}

if (IsUniform) {		if (IsUniform) {
// If the recipe is uniform across all parts (instead of just per VF), only		// If the recipe is uniform across all parts (instead of just per VF), only
// generate a single instance.		// generate a single instance.
if ((isa<LoadInst>(UI) \|\| isa<StoreInst>(UI)) &&		if ((isa<LoadInst>(UI) \|\| isa<StoreInst>(UI) \|\| isa<PrefetchInst>(UI)) &&
all_of(operands(), [](VPValue *Op) {		all_of(operands(), [](VPValue *Op) {
return Op->isDefinedOutsideVectorRegions();		return Op->isDefinedOutsideVectorRegions();
})) {		})) {
State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), State);		State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), State);
if (user_begin() != user_end()) {		if (user_begin() != user_end()) {
for (unsigned Part = 1; Part < State.UF; ++Part)		for (unsigned Part = 1; Part < State.UF; ++Part)
State.set(this, State.get(this, VPIteration(0, 0)),		State.set(this, State.get(this, VPIteration(0, 0)),
VPIteration(Part, 0));		VPIteration(Part, 0));
Show All 13 Lines	void VPReplicateRecipe::execute(VPTransformState &State) {
if (isa<StoreInst>(UI) &&		if (isa<StoreInst>(UI) &&
vputils::isUniformAfterVectorization(getOperand(1))) {		vputils::isUniformAfterVectorization(getOperand(1))) {
auto Lane = VPLane::getLastLaneForVF(State.VF);		auto Lane = VPLane::getLastLaneForVF(State.VF);
State.ILV->scalarizeInstruction(UI, this, VPIteration(State.UF - 1, Lane),		State.ILV->scalarizeInstruction(UI, this, VPIteration(State.UF - 1, Lane),
State);		State);
return;		return;
}		}

		// A prefetch of a loop varying value to a uniform address only needs the last
		// copy of the store.
		if (isa<PrefetchInst>(UI) &&
		vputils::isUniformAfterVectorization(getOperand(0))) {
		auto Lane = VPLane::getLastLaneForVF(State.VF);
		State.ILV->scalarizeInstruction(UI, this, VPIteration(State.UF - 1, Lane),
		State);
		return;
		}

// Generate scalar instances for all VF lanes of all UF parts.		// Generate scalar instances for all VF lanes of all UF parts.
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");		assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
const unsigned EndLane = State.VF.getKnownMinValue();		const unsigned EndLane = State.VF.getKnownMinValue();
for (unsigned Part = 0; Part < State.UF; ++Part)		for (unsigned Part = 0; Part < State.UF; ++Part)
for (unsigned Lane = 0; Lane < EndLane; ++Lane)		for (unsigned Lane = 0; Lane < EndLane; ++Lane)
State.ILV->scalarizeInstruction(UI, this, VPIteration(Part, Lane), State);		State.ILV->scalarizeInstruction(UI, this, VPIteration(Part, Lane), State);
}		}

void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {		void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
VPValue *StoredValue = isStore() ? getStoredValue() : nullptr;		VPValue *StoredValue = isStore() ? getStoredValue() : nullptr;

// Attempt to issue a wide load.		// Attempt to issue a wide load.
LoadInst *LI = dyn_cast<LoadInst>(&Ingredient);		LoadInst *LI = dyn_cast<LoadInst>(&Ingredient);
StoreInst *SI = dyn_cast<StoreInst>(&Ingredient);		StoreInst *SI = dyn_cast<StoreInst>(&Ingredient);
		PrefetchInst *PF = dyn_cast<PrefetchInst>(&Ingredient);

assert((LI \|\| SI) && "Invalid Load/Store instruction");		assert((LI \|\| SI \|\| PF) && "Invalid Load/Store/Prefetch instruction");
assert((!SI \|\| StoredValue) && "No stored value provided for widened store");		assert((!SI \|\| StoredValue) && "No stored value provided for widened store");
assert((!LI \|\| !StoredValue) && "Stored value provided for widened load");		assert((!LI \|\| !StoredValue) && "Stored value provided for widened load");
		assert((!PF \|\| !StoredValue) && "Stored value provided for widened prefetch");

Type *ScalarDataTy = getLoadStoreType(&Ingredient);		Type *ScalarDataTy = getLdStPfType(&Ingredient);

auto *DataTy = VectorType::get(ScalarDataTy, State.VF);		auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
const Align Alignment = getLoadStoreAlignment(&Ingredient);		const Align Alignment = getLdStPfAlignment(&Ingredient);
bool CreateGatherScatter = !isConsecutive();		bool CreateGatherScatter = !isConsecutive();

auto &Builder = State.Builder;		auto &Builder = State.Builder;
InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);		InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
bool isMaskRequired = getMask();		bool isMaskRequired = getMask();
if (isMaskRequired)		if (isMaskRequired)
for (unsigned Part = 0; Part < State.UF; ++Part)		for (unsigned Part = 0; Part < State.UF; ++Part)
BlockInMaskParts[Part] = State.get(getMask(), Part);		BlockInMaskParts[Part] = State.get(getMask(), Part);
▲ Show 20 Lines • Show All 67 Lines • ▼ Show 20 Lines	for (unsigned Part = 0; Part < State.UF; ++Part) {
else		else
NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);		NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
}		}
State.addMetadata(NewSI, SI);		State.addMetadata(NewSI, SI);
}		}
return;		return;
}		}

		if (PF) {
		State.setDebugLocFromInst(PF);
		Value *RW = PF->getArgOperand(1);
		Value *Locality = PF->getArgOperand(2);

		for (unsigned Part = 0; Part < State.UF; ++Part) {
		Instruction *NewPF = nullptr;
		if (CreateGatherScatter) {
		Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
		Value *VectorGep = State.get(getAddr(), Part);
		NewPF = Builder.CreateMaskedGatherPrefetch(DataTy, VectorGep, Alignment,
		MaskPart, RW, Locality);
		} else {
		auto *VecPtr =
		CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0)));
		if (isMaskRequired)
		NewPF = Builder.CreateMaskedPrefetch(DataTy, VecPtr, Alignment,
		BlockInMaskParts[Part],
		RW, Locality);
		else {
		auto *MaskPart = Constant::getAllOnesValue(
		VectorType::get(Type::getInt1Ty(DataTy->getContext()), DataTy));
		NewPF = Builder.CreateMaskedPrefetch(DataTy, VecPtr, Alignment,
		MaskPart, RW, Locality);
		}
		}
		State.addMetadata(NewPF, PF);
		}
		return;
		}

// Handle loads.		// Handle loads.
assert(LI && "Must have a load instruction");		assert(LI && "Must have a load instruction");
State.setDebugLocFromInst(LI);		State.setDebugLocFromInst(LI);
for (unsigned Part = 0; Part < State.UF; ++Part) {		for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *NewLI;		Value *NewLI;
if (CreateGatherScatter) {		if (CreateGatherScatter) {
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;		Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
Value *VectorGep = State.get(getAddr(), Part);		Value *VectorGep = State.get(getAddr(), Part);
▲ Show 20 Lines • Show All 950 Lines • Show Last 20 Lines

llvm/lib/Transforms/Vectorize/VPlan.h

Show First 20 Lines • Show All 1,895 Lines • ▼ Show 20 Lines	class VPWidenMemoryInstructionRecipe : public VPRecipeBase {

void setMask(VPValue *Mask) {		void setMask(VPValue *Mask) {
if (!Mask)		if (!Mask)
return;		return;
addOperand(Mask);		addOperand(Mask);
}		}

bool isMasked() const {		bool isMasked() const {
return isStore() ? getNumOperands() == 3 : getNumOperands() == 2;		return isPrefetch() ? getNumOperands() == 5 :
		isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
}		}

public:		public:
VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue Addr, VPValue Mask,		VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue Addr, VPValue Mask,
bool Consecutive, bool Reverse)		bool Consecutive, bool Reverse)
: VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}),		: VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}),
Ingredient(Load), Consecutive(Consecutive), Reverse(Reverse) {		Ingredient(Load), Consecutive(Consecutive), Reverse(Reverse) {
assert((Consecutive \|\| !Reverse) && "Reverse implies consecutive");		assert((Consecutive \|\| !Reverse) && "Reverse implies consecutive");
new VPValue(this, &Load);		new VPValue(this, &Load);
setMask(Mask);		setMask(Mask);
}		}

VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,		VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
VPValue StoredValue, VPValue Mask,		VPValue StoredValue, VPValue Mask,
bool Consecutive, bool Reverse)		bool Consecutive, bool Reverse)
: VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr, StoredValue}),		: VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr, StoredValue}),
Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) {		Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) {
assert((Consecutive \|\| !Reverse) && "Reverse implies consecutive");		assert((Consecutive \|\| !Reverse) && "Reverse implies consecutive");
setMask(Mask);		setMask(Mask);
}		}

		VPWidenMemoryInstructionRecipe(PrefetchInst &Prefetch, VPValue Addr, VPValue Mask,
		bool Consecutive, bool Reverse)
		: VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}),
		Ingredient(Prefetch), Consecutive(Consecutive), Reverse(Reverse) {
		assert((Consecutive \|\| !Reverse) && "Reverse implies consecutive");
		setMask(Mask);
		}

VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC)		VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC)

/// Return the address accessed by this recipe.		/// Return the address accessed by this recipe.
VPValue *getAddr() const {		VPValue *getAddr() const {
return getOperand(0); // Address is the 1st, mandatory operand.		return getOperand(0); // Address is the 1st, mandatory operand.
}		}

/// Return the mask used by this recipe. Note that a full mask is represented		/// Return the mask used by this recipe. Note that a full mask is represented
/// by a nullptr.		/// by a nullptr.
VPValue *getMask() const {		VPValue *getMask() const {
// Mask is optional and therefore the last operand.		// Mask is optional and therefore the last operand.
return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;		return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
}		}

/// Returns true if this recipe is a store.		/// Returns true if this recipe is a store.
bool isStore() const { return isa<StoreInst>(Ingredient); }		bool isStore() const { return isa<StoreInst>(Ingredient); }

		/// Returns true if this recipe is a prefetch.
		bool isPrefetch() const { return isa<PrefetchInst>(Ingredient); }

/// Return the address accessed by this recipe.		/// Return the address accessed by this recipe.
VPValue *getStoredValue() const {		VPValue *getStoredValue() const {
assert(isStore() && "Stored value only available for store instructions");		assert(isStore() && "Stored value only available for store instructions");
return getOperand(1); // Stored value is the 2nd, mandatory operand.		return getOperand(1); // Stored value is the 2nd, mandatory operand.
}		}

// Return whether the loaded-from / stored-to addresses are consecutive.		// Return whether the loaded-from / stored-to addresses are consecutive.
bool isConsecutive() const { return Consecutive; }		bool isConsecutive() const { return Consecutive; }
▲ Show 20 Lines • Show All 1,038 Lines • Show Last 20 Lines

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Show First 20 Lines • Show All 129 Lines • ▼ Show 20 Lines	bool VPRecipeBase::mayHaveSideEffects() const {
case VPWidenSelectSC: {		case VPWidenSelectSC: {
const Instruction *I =		const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());		dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
(void)I;		(void)I;
assert((!I \|\| !I->mayHaveSideEffects()) &&		assert((!I \|\| !I->mayHaveSideEffects()) &&
"underlying instruction has side-effects");		"underlying instruction has side-effects");
return false;		return false;
}		}
case VPWidenMemoryInstructionSC:		case VPWidenMemoryInstructionSC: {
		auto *R = cast<VPWidenMemoryInstructionRecipe>(this);
		if (isa<PrefetchInst>(R->getIngredient()))
		return true;
assert(cast<VPWidenMemoryInstructionRecipe>(this)		assert(cast<VPWidenMemoryInstructionRecipe>(this)
->getIngredient()		->getIngredient()
.mayHaveSideEffects() == mayWriteToMemory() &&		.mayHaveSideEffects() == mayWriteToMemory() &&
"mayHaveSideffects result for ingredient differs from this "		"mayHaveSideffects result for ingredient differs from this "
"implementation");		"implementation");
return mayWriteToMemory();		return mayWriteToMemory();
		}
case VPReplicateSC: {		case VPReplicateSC: {
auto *R = cast<VPReplicateRecipe>(this);		auto *R = cast<VPReplicateRecipe>(this);
return R->getUnderlyingInstr()->mayHaveSideEffects();		return R->getUnderlyingInstr()->mayHaveSideEffects();
}		}
default:		default:
return true;		return true;
}		}
}		}
▲ Show 20 Lines • Show All 923 Lines • ▼ Show 20 Lines	void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
O << " = ";		O << " = ";
printOperands(O, SlotTracker);		printOperands(O, SlotTracker);
}		}

void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,		void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {		VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN ";		O << Indent << "WIDEN ";

if (!isStore()) {		if (!isStore() && !isPrefetch()) {
getVPSingleValue()->printAsOperand(O, SlotTracker);		getVPSingleValue()->printAsOperand(O, SlotTracker);
O << " = ";		O << " = ";
}		}
O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";		O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";

printOperands(O, SlotTracker);		printOperands(O, SlotTracker);
}		}
#endif		#endif
▲ Show 20 Lines • Show All 293 Lines • Show Last 20 Lines