Diff 476159

llvm/include/llvm/Analysis/TargetTransformInfo.h

Show First 20 Lines • Show All 820 Lines • ▼ Show 20 Lines	public:
/// This applies to floating-point math operations and calls, not memory		/// This applies to floating-point math operations and calls, not memory
/// operations, shuffles, or casts.		/// operations, shuffles, or casts.
bool isFPVectorizationPotentiallyUnsafe() const;		bool isFPVectorizationPotentiallyUnsafe() const;

/// Determine if the target supports unaligned memory accesses.		/// Determine if the target supports unaligned memory accesses.
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,		bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
unsigned AddressSpace = 0,		unsigned AddressSpace = 0,
Align Alignment = Align(1),		Align Alignment = Align(1),
bool *Fast = nullptr) const;		unsigned *Fast = nullptr) const;

/// Return hardware support for population count.		/// Return hardware support for population count.
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;		PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;

/// Return true if the hardware has a fast square-root instruction.		/// Return true if the hardware has a fast square-root instruction.
bool haveFastSqrt(Type *Ty) const;		bool haveFastSqrt(Type *Ty) const;

/// Return true if the cost of the instruction is too high to speculatively		/// Return true if the cost of the instruction is too high to speculatively
▲ Show 20 Lines • Show All 848 Lines • ▼ Show 20 Lines	public:
enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;		enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
virtual bool enableInterleavedAccessVectorization() = 0;		virtual bool enableInterleavedAccessVectorization() = 0;
virtual bool enableMaskedInterleavedAccessVectorization() = 0;		virtual bool enableMaskedInterleavedAccessVectorization() = 0;
virtual bool isFPVectorizationPotentiallyUnsafe() = 0;		virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,		virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,		unsigned BitWidth,
unsigned AddressSpace,		unsigned AddressSpace,
Align Alignment,		Align Alignment,
bool *Fast) = 0;		unsigned *Fast) = 0;
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;		virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
virtual bool haveFastSqrt(Type *Ty) = 0;		virtual bool haveFastSqrt(Type *Ty) = 0;
virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I) = 0;		virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I) = 0;
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;		virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
virtual InstructionCost getFPOpCost(Type *Ty) = 0;		virtual InstructionCost getFPOpCost(Type *Ty) = 0;
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,		virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
const APInt &Imm, Type *Ty) = 0;		const APInt &Imm, Type *Ty) = 0;
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,		virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
▲ Show 20 Lines • Show All 474 Lines • ▼ Show 20 Lines	public:
bool enableMaskedInterleavedAccessVectorization() override {		bool enableMaskedInterleavedAccessVectorization() override {
return Impl.enableMaskedInterleavedAccessVectorization();		return Impl.enableMaskedInterleavedAccessVectorization();
}		}
bool isFPVectorizationPotentiallyUnsafe() override {		bool isFPVectorizationPotentiallyUnsafe() override {
return Impl.isFPVectorizationPotentiallyUnsafe();		return Impl.isFPVectorizationPotentiallyUnsafe();
}		}
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,		bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
unsigned AddressSpace, Align Alignment,		unsigned AddressSpace, Align Alignment,
bool *Fast) override {		unsigned *Fast) override {
return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,		return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
Alignment, Fast);		Alignment, Fast);
}		}
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {		PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
return Impl.getPopcntSupport(IntTyWidthInBit);		return Impl.getPopcntSupport(IntTyWidthInBit);
}		}
bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }		bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }

▲ Show 20 Lines • Show All 476 Lines • Show Last 20 Lines

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Show First 20 Lines • Show All 360 Lines • ▼ Show 20 Lines	public:
bool enableInterleavedAccessVectorization() const { return false; }		bool enableInterleavedAccessVectorization() const { return false; }

bool enableMaskedInterleavedAccessVectorization() const { return false; }		bool enableMaskedInterleavedAccessVectorization() const { return false; }

bool isFPVectorizationPotentiallyUnsafe() const { return false; }		bool isFPVectorizationPotentiallyUnsafe() const { return false; }

bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,		bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
unsigned AddressSpace, Align Alignment,		unsigned AddressSpace, Align Alignment,
bool *Fast) const {		unsigned *Fast) const {
return false;		return false;
}		}

TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {		TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
return TTI::PSK_Software;		return TTI::PSK_Software;
}		}

bool haveFastSqrt(Type *Ty) const { return false; }		bool haveFastSqrt(Type *Ty) const { return false; }
▲ Show 20 Lines • Show All 915 Lines • Show Last 20 Lines

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Show First 20 Lines • Show All 250 Lines • ▼ Show 20 Lines	protected:

using TargetTransformInfoImplBase::DL;		using TargetTransformInfoImplBase::DL;

public:		public:
/// \name Scalar TTI Implementations		/// \name Scalar TTI Implementations
/// @{		/// @{
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,		bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
unsigned AddressSpace, Align Alignment,		unsigned AddressSpace, Align Alignment,
bool *Fast) const {		unsigned *Fast) const {
EVT E = EVT::getIntegerVT(Context, BitWidth);		EVT E = EVT::getIntegerVT(Context, BitWidth);
return getTLI()->allowsMisalignedMemoryAccesses(		return getTLI()->allowsMisalignedMemoryAccesses(
E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);		E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
}		}

bool hasBranchDivergence() { return false; }		bool hasBranchDivergence() { return false; }

bool useGPUDivergenceAnalysis() { return false; }		bool useGPUDivergenceAnalysis() { return false; }
▲ Show 20 Lines • Show All 2,149 Lines • Show Last 20 Lines

llvm/include/llvm/CodeGen/TargetLowering.h

Show First 20 Lines • Show All 579 Lines • ▼ Show 20 Lines	virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
MVT LoadMVT = LoadVT.getSimpleVT();		MVT LoadMVT = LoadVT.getSimpleVT();

// Don't bother doing this if it's just going to be promoted again later, as		// Don't bother doing this if it's just going to be promoted again later, as
// doing so might interfere with other combines.		// doing so might interfere with other combines.
if (getOperationAction(ISD::LOAD, LoadMVT) == Promote &&		if (getOperationAction(ISD::LOAD, LoadMVT) == Promote &&
getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT())		getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT())
return false;		return false;

bool Fast = false;		unsigned Fast = 0;
return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT,		return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT,
MMO, &Fast) && Fast;		MMO, &Fast) && Fast;
}		}

/// Return true if the following transform is beneficial:		/// Return true if the following transform is beneficial:
/// (store (y (conv x)), y)) -> (store x, (x))		/// (store (y (conv x)), y)) -> (store x, (x))
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT,		virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT,
const SelectionDAG &DAG,		const SelectionDAG &DAG,
▲ Show 20 Lines • Show All 1,113 Lines • ▼ Show 20 Lines	#include "llvm/IR/ConstrainedOps.def"
unsigned getMaxStoresPerMemmove(bool OptSize) const {		unsigned getMaxStoresPerMemmove(bool OptSize) const {
return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove;		return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove;
}		}

/// Determine if the target supports unaligned memory accesses.		/// Determine if the target supports unaligned memory accesses.
///		///
/// This function returns true if the target allows unaligned memory accesses		/// This function returns true if the target allows unaligned memory accesses
/// of the specified type in the given address space. If true, it also returns		/// of the specified type in the given address space. If true, it also returns
/// whether the unaligned memory access is "fast" in the last argument by		/// a relative speed of the unaligned memory access in the last argument by
/// reference. This is used, for example, in situations where an array		/// reference. The higher the speed number the faster the operation comparing
/// copy/move/set is converted to a sequence of store operations. Its use		/// to a number returned by another such call. This is used, for example, in
/// helps to ensure that such replacements don't generate code that causes an		/// situations where an array copy/move/set is converted to a sequence of
/// alignment error (trap) on the target machine.		/// store operations. Its use helps to ensure that such replacements don't
		/// generate code that causes an alignment error (trap) on the target machine.
virtual bool allowsMisalignedMemoryAccesses(		virtual bool allowsMisalignedMemoryAccesses(
EVT, unsigned AddrSpace = 0, Align Alignment = Align(1),		EVT, unsigned AddrSpace = 0, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,		MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool * /Fast/ = nullptr) const {		unsigned * /Fast/ = nullptr) const {
return false;		return false;
}		}

/// LLT handling variant.		/// LLT handling variant.
virtual bool allowsMisalignedMemoryAccesses(		virtual bool allowsMisalignedMemoryAccesses(
LLT, unsigned AddrSpace = 0, Align Alignment = Align(1),		LLT, unsigned AddrSpace = 0, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,		MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool * /Fast/ = nullptr) const {		unsigned * /Fast/ = nullptr) const {
return false;		return false;
}		}

/// This function returns true if the memory access is aligned or if the		/// This function returns true if the memory access is aligned or if the
/// target allows this specific unaligned memory access. If the access is		/// target allows this specific unaligned memory access. If the access is
/// allowed, the optional final parameter returns if the access is also fast		/// allowed, the optional final parameter returns a relative speed of the
/// (as defined by the target).		/// access (as defined by the target).
bool allowsMemoryAccessForAlignment(		bool allowsMemoryAccessForAlignment(
LLVMContext &Context, const DataLayout &DL, EVT VT,		LLVMContext &Context, const DataLayout &DL, EVT VT,
unsigned AddrSpace = 0, Align Alignment = Align(1),		unsigned AddrSpace = 0, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,		MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *Fast = nullptr) const;		unsigned *Fast = nullptr) const;

/// Return true if the memory access of this type is aligned or if the target		/// Return true if the memory access of this type is aligned or if the target
/// allows this specific unaligned access for the given MachineMemOperand.		/// allows this specific unaligned access for the given MachineMemOperand.
/// If the access is allowed, the optional final parameter returns if the		/// If the access is allowed, the optional final parameter returns a relative
/// access is also fast (as defined by the target).		/// speed of the access (as defined by the target).
bool allowsMemoryAccessForAlignment(LLVMContext &Context,		bool allowsMemoryAccessForAlignment(LLVMContext &Context,
const DataLayout &DL, EVT VT,		const DataLayout &DL, EVT VT,
const MachineMemOperand &MMO,		const MachineMemOperand &MMO,
bool *Fast = nullptr) const;		unsigned *Fast = nullptr) const;

/// Return true if the target supports a memory access of this type for the		/// Return true if the target supports a memory access of this type for the
/// given address space and alignment. If the access is allowed, the optional		/// given address space and alignment. If the access is allowed, the optional
/// final parameter returns if the access is also fast (as defined by the		/// final parameter returns the relative speed of the access (as defined by
/// target).		/// the target).
virtual bool		virtual bool
allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,		allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
unsigned AddrSpace = 0, Align Alignment = Align(1),		unsigned AddrSpace = 0, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,		MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *Fast = nullptr) const;		unsigned *Fast = nullptr) const;

/// Return true if the target supports a memory access of this type for the		/// Return true if the target supports a memory access of this type for the
/// given MachineMemOperand. If the access is allowed, the optional		/// given MachineMemOperand. If the access is allowed, the optional
/// final parameter returns if the access is also fast (as defined by the		/// final parameter returns the relative access speed (as defined by the
/// target).		/// target).
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,		bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
const MachineMemOperand &MMO,		const MachineMemOperand &MMO,
bool *Fast = nullptr) const;		unsigned *Fast = nullptr) const;

/// LLT handling variant.		/// LLT handling variant.
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty,		bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty,
const MachineMemOperand &MMO,		const MachineMemOperand &MMO,
bool *Fast = nullptr) const;		unsigned *Fast = nullptr) const;

/// Returns the target specific optimal type for load and store operations as		/// Returns the target specific optimal type for load and store operations as
/// a result of memset, memcpy, and memmove lowering.		/// a result of memset, memcpy, and memmove lowering.
/// It returns EVT::Other if the type should be determined using generic		/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.		/// target-independent logic.
virtual EVT		virtual EVT
getOptimalMemOpType(const MemOp &Op,		getOptimalMemOpType(const MemOp &Op,
const AttributeList & /FuncAttributes/) const {		const AttributeList & /FuncAttributes/) const {
▲ Show 20 Lines • Show All 3,365 Lines • Show Last 20 Lines

llvm/lib/Analysis/TargetTransformInfo.cpp

	Show First 20 Lines • Show All 552 Lines • ▼ Show 20 Lines
	bool TargetTransformInfo::enableMaskedInterleavedAccessVectorization() const {			bool TargetTransformInfo::enableMaskedInterleavedAccessVectorization() const {
	return TTIImpl->enableMaskedInterleavedAccessVectorization();			return TTIImpl->enableMaskedInterleavedAccessVectorization();
	}			}

	bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {			bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
	return TTIImpl->isFPVectorizationPotentiallyUnsafe();			return TTIImpl->isFPVectorizationPotentiallyUnsafe();
	}			}

	bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context,			bool
				TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context,
	unsigned BitWidth,			unsigned BitWidth,
	unsigned AddressSpace,			unsigned AddressSpace,
	Align Alignment,			Align Alignment,
	bool *Fast) const {			unsigned *Fast) const {
	return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth,			return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth,
	AddressSpace, Alignment, Fast);			AddressSpace, Alignment, Fast);
	}			}

	TargetTransformInfo::PopcntSupportKind			TargetTransformInfo::PopcntSupportKind
	TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {			TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
	return TTIImpl->getPopcntSupport(IntTyWidthInBit);			return TTIImpl->getPopcntSupport(IntTyWidthInBit);
	}			}
	▲ Show 20 Lines • Show All 657 Lines • Show Last 20 Lines

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Show First 20 Lines • Show All 3,521 Lines • ▼ Show 20 Lines	if (!isLegalOrBeforeLegalizer(
{TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))		{TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
return false;		return false;
auto PtrInfo = MMO.getPointerInfo();		auto PtrInfo = MMO.getPointerInfo();
auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);		auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);

// Load must be allowed and fast on the target.		// Load must be allowed and fast on the target.
LLVMContext &C = MF.getFunction().getContext();		LLVMContext &C = MF.getFunction().getContext();
auto &DL = MF.getDataLayout();		auto &DL = MF.getDataLayout();
bool Fast = false;		unsigned Fast = 0;
if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) \|\|		if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) \|\|
!Fast)		!Fast)
return false;		return false;

MatchInfo = [=](MachineIRBuilder &MIB) {		MatchInfo = [=](MachineIRBuilder &MIB) {
MIB.setInstrAndDebugLoc(*LatestLoad);		MIB.setInstrAndDebugLoc(*LatestLoad);
Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;		Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
MIB.buildLoad(LoadDst, Ptr, *NewMMO);		MIB.buildLoad(LoadDst, Ptr, *NewMMO);
▲ Show 20 Lines • Show All 188 Lines • ▼ Show 20 Lines	bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI,

if (FoundStores.size() != NumStoresRequired) {		if (FoundStores.size() != NumStoresRequired) {
return false;		return false;
}		}

const auto &DL = LastStore.getMF()->getDataLayout();		const auto &DL = LastStore.getMF()->getDataLayout();
auto &C = LastStore.getMF()->getFunction().getContext();		auto &C = LastStore.getMF()->getFunction().getContext();
// Check that a store of the wide type is both allowed and fast on the target		// Check that a store of the wide type is both allowed and fast on the target
bool Fast = false;		unsigned Fast = 0;
bool Allowed = getTargetLowering().allowsMemoryAccess(		bool Allowed = getTargetLowering().allowsMemoryAccess(
C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);		C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
if (!Allowed \|\| !Fast)		if (!Allowed \|\| !Fast)
return false;		return false;

// Check if the pieces of the value are going to the expected places in memory		// Check if the pieces of the value are going to the expected places in memory
// to merge the stores.		// to merge the stores.
unsigned NarrowBits = MemTy.getScalarSizeInBits();		unsigned NarrowBits = MemTy.getScalarSizeInBits();
▲ Show 20 Lines • Show All 2,383 Lines • Show Last 20 Lines

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 7,388 Lines • ▼ Show 20 Lines	while (TySize > Size) {
if (NewTy.isVector())		if (NewTy.isVector())
NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);		NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));		NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
unsigned NewTySize = NewTy.getSizeInBytes();		unsigned NewTySize = NewTy.getSizeInBytes();
assert(NewTySize > 0 && "Could not find appropriate type");		assert(NewTySize > 0 && "Could not find appropriate type");

// If the new LLT cannot cover all of the remaining bits, then consider		// If the new LLT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.		// issuing a (or a pair of) unaligned and overlapping load / store.
bool Fast;		unsigned Fast;
// Need to get a VT equivalent for allowMisalignedMemoryAccesses().		// Need to get a VT equivalent for allowMisalignedMemoryAccesses().
MVT VT = getMVTForLLT(Ty);		MVT VT = getMVTForLLT(Ty);
if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&		if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
TLI.allowsMisalignedMemoryAccesses(		TLI.allowsMisalignedMemoryAccesses(
VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),		VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
MachineMemOperand::MONone, &Fast) &&		MachineMemOperand::MONone, &Fast) &&
Fast)		Fast)
TySize = Size;		TySize = Size;
▲ Show 20 Lines • Show All 485 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 8,195 Lines • ▼ Show 20 Lines	for (auto *Store : Stores) {
OffsetMap[Offset] = ByteOffsetFromBase;		OffsetMap[Offset] = ByteOffsetFromBase;
}		}

assert(FirstOffset != INT64_MAX && "First byte offset must be set");		assert(FirstOffset != INT64_MAX && "First byte offset must be set");
assert(FirstStore && "First store must be set");		assert(FirstStore && "First store must be set");

// Check that a store of the wide type is both allowed and fast on the target		// Check that a store of the wide type is both allowed and fast on the target
const DataLayout &Layout = DAG.getDataLayout();		const DataLayout &Layout = DAG.getDataLayout();
bool Fast = false;		unsigned Fast = 0;
bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,		bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
*FirstStore->getMemOperand(), &Fast);		*FirstStore->getMemOperand(), &Fast);
if (!Allowed \|\| !Fast)		if (!Allowed \|\| !Fast)
return SDValue();		return SDValue();

// Check if the pieces of the value are going to the expected places in memory		// Check if the pieces of the value are going to the expected places in memory
// to merge the stores.		// to merge the stores.
auto checkOffsets = [&](bool MatchLittleEndian) {		auto checkOffsets = [&](bool MatchLittleEndian) {
▲ Show 20 Lines • Show All 229 Lines • ▼ Show 20 Lines	SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {

// If we need to bswap and zero extend, we have to insert a shift. Check that		// If we need to bswap and zero extend, we have to insert a shift. Check that
// it is legal.		// it is legal.
if (NeedsBswap && NeedsZext && LegalOperations &&		if (NeedsBswap && NeedsZext && LegalOperations &&
!TLI.isOperationLegal(ISD::SHL, VT))		!TLI.isOperationLegal(ISD::SHL, VT))
return SDValue();		return SDValue();

// Check that a load of the wide type is both allowed and fast on the target		// Check that a load of the wide type is both allowed and fast on the target
bool Fast = false;		unsigned Fast = 0;
bool Allowed =		bool Allowed =
TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,		TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
*FirstLoad->getMemOperand(), &Fast);		*FirstLoad->getMemOperand(), &Fast);
if (!Allowed \|\| !Fast)		if (!Allowed \|\| !Fast)
return SDValue();		return SDValue();

SDValue NewLoad =		SDValue NewLoad =
DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,		DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
▲ Show 20 Lines • Show All 1,490 Lines • ▼ Show 20 Lines	if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
LHS->getAddressSpace() == RHS->getAddressSpace() &&		LHS->getAddressSpace() == RHS->getAddressSpace() &&
(LHS->hasOneUse() \|\| RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&		(LHS->hasOneUse() \|\| RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
ISD::isNON_EXTLoad(LHS)) {		ISD::isNON_EXTLoad(LHS)) {
if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {		if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
SDLoc DL(RHS);		SDLoc DL(RHS);
uint64_t PtrOff =		uint64_t PtrOff =
IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);		IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);		Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
bool Fast = false;		unsigned Fast = 0;
if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,		if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
RHS->getAddressSpace(), NewAlign,		RHS->getAddressSpace(), NewAlign,
RHS->getMemOperand()->getFlags(), &Fast) &&		RHS->getMemOperand()->getFlags(), &Fast) &&
Fast) {		Fast) {
SDValue NewPtr = DAG.getMemBasePlusOffset(		SDValue NewPtr = DAG.getMemBasePlusOffset(
RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);		RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
AddToWorklist(NewPtr.getNode());		AddToWorklist(NewPtr.getNode());
SDValue Load = DAG.getLoad(		SDValue Load = DAG.getLoad(
▲ Show 20 Lines • Show All 3,888 Lines • ▼ Show 20 Lines	SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
if (DAG.getDataLayout().isBigEndian())		if (DAG.getDataLayout().isBigEndian())
std::swap(LD1, LD2);		std::swap(LD1, LD2);

if (!LD1 \|\| !LD2 \|\| !ISD::isNON_EXTLoad(LD1) \|\| !ISD::isNON_EXTLoad(LD2) \|\|		if (!LD1 \|\| !LD2 \|\| !ISD::isNON_EXTLoad(LD1) \|\| !ISD::isNON_EXTLoad(LD2) \|\|
!LD1->hasOneUse() \|\| !LD2->hasOneUse() \|\|		!LD1->hasOneUse() \|\| !LD2->hasOneUse() \|\|
LD1->getAddressSpace() != LD2->getAddressSpace())		LD1->getAddressSpace() != LD2->getAddressSpace())
return SDValue();		return SDValue();

bool LD1Fast = false;		unsigned LD1Fast = 0;
EVT LD1VT = LD1->getValueType(0);		EVT LD1VT = LD1->getValueType(0);
unsigned LD1Bytes = LD1VT.getStoreSize();		unsigned LD1Bytes = LD1VT.getStoreSize();
if ((!LegalOperations \|\| TLI.isOperationLegal(ISD::LOAD, VT)) &&		if ((!LegalOperations \|\| TLI.isOperationLegal(ISD::LOAD, VT)) &&
DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&		DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,		TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
*LD1->getMemOperand(), &LD1Fast) && LD1Fast)		*LD1->getMemOperand(), &LD1Fast) && LD1Fast)
return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),		return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
LD1->getPointerInfo(), LD1->getAlign());		LD1->getPointerInfo(), LD1->getAlign());
▲ Show 20 Lines • Show All 3,138 Lines • ▼ Show 20 Lines	bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
if (LDMemType.isInteger() && LDType.isInteger()) {		if (LDMemType.isInteger() && LDType.isInteger()) {
switch (LD->getExtensionType()) {		switch (LD->getExtensionType()) {
case ISD::NON_EXTLOAD:		case ISD::NON_EXTLOAD:
Val = DAG.getBitcast(LDType, Val);		Val = DAG.getBitcast(LDType, Val);
return true;		return true;
case ISD::EXTLOAD:		case ISD::EXTLOAD:
Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);		Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
return true;		return true;
case ISD::SEXTLOAD:		case ISD::SEXTLOAD:
		kosarevUnsubmitted Not Done Reply Inline Actions Shouldn't we also rename them all to something like `Fastness`? kosarev: Shouldn't we also rename them all to something like `Fastness`?
		rampitecAuthorUnsubmitted Done Reply Inline Actions I have a change like this. It is 4 times bigger, so I dropped it. We can rename it if this is reasonable, but probably as a separate NFC. In any way it is almost not reviewable. rampitec: I have a change like this. It is 4 times bigger, so I dropped it. We can rename it if this is…
		kosarevUnsubmitted Not Done Reply Inline Actions OK, makes sense. kosarev: OK, makes sense.
Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);		Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
return true;		return true;
case ISD::ZEXTLOAD:		case ISD::ZEXTLOAD:
Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);		Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
return true;		return true;
}		}
}		}
return false;		return false;
▲ Show 20 Lines • Show All 544 Lines • ▼ Show 20 Lines	bool canMergeExpensiveCrossRegisterBankCopy() const {
const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();		const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
// Assume bitcasts are cheap, unless both register classes do not		// Assume bitcasts are cheap, unless both register classes do not
// explicitly share a common sub class.		// explicitly share a common sub class.
if (!TRI \|\| TRI->getCommonSubClass(ArgRC, ResRC))		if (!TRI \|\| TRI->getCommonSubClass(ArgRC, ResRC))
return false;		return false;

// Check if it will be merged with the load.		// Check if it will be merged with the load.
// 1. Check the alignment / fast memory access constraint.		// 1. Check the alignment / fast memory access constraint.
bool IsFast = false;		unsigned IsFast = 0;
if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,		if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
Origin->getAddressSpace(), getAlign(),		Origin->getAddressSpace(), getAlign(),
Origin->getMemOperand()->getFlags(), &IsFast) \|\|		Origin->getMemOperand()->getFlags(), &IsFast) \|\|
!IsFast)		!IsFast)
return false;		return false;

// 2. Check that the load is a legal operation for that type.		// 2. Check that the load is a legal operation for that type.
if (!TLI.isOperationLegal(ISD::LOAD, ResVT))		if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
▲ Show 20 Lines • Show All 486 Lines • ▼ Show 20 Lines	if ((Imm & Mask) == Imm) {
if (Opc == ISD::AND)		if (Opc == ISD::AND)
NewImm ^= APInt::getAllOnes(NewBW);		NewImm ^= APInt::getAllOnes(NewBW);
uint64_t PtrOff = ShAmt / 8;		uint64_t PtrOff = ShAmt / 8;
// For big endian targets, we need to adjust the offset to the pointer to		// For big endian targets, we need to adjust the offset to the pointer to
// load the correct bytes.		// load the correct bytes.
if (DAG.getDataLayout().isBigEndian())		if (DAG.getDataLayout().isBigEndian())
PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;		PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;

bool IsFast = false;		unsigned IsFast = 0;
Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);		Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,		if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
LD->getAddressSpace(), NewAlign,		LD->getAddressSpace(), NewAlign,
LD->getMemOperand()->getFlags(), &IsFast) \|\|		LD->getMemOperand()->getFlags(), &IsFast) \|\|
!IsFast)		!IsFast)
return SDValue();		return SDValue();

SDValue NewPtr =		SDValue NewPtr =
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines	if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&

TypeSize VTSize = VT.getSizeInBits();		TypeSize VTSize = VT.getSizeInBits();

// We don't know the size of scalable types at compile time so we cannot		// We don't know the size of scalable types at compile time so we cannot
// create an integer of the equivalent size.		// create an integer of the equivalent size.
if (VTSize.isScalable())		if (VTSize.isScalable())
return SDValue();		return SDValue();

bool FastLD = false, FastST = false;		unsigned FastLD = 0, FastST = 0;
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());		EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
if (!TLI.isOperationLegal(ISD::LOAD, IntVT) \|\|		if (!TLI.isOperationLegal(ISD::LOAD, IntVT) \|\|
!TLI.isOperationLegal(ISD::STORE, IntVT) \|\|		!TLI.isOperationLegal(ISD::STORE, IntVT) \|\|
!TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) \|\|		!TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) \|\|
!TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) \|\|		!TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) \|\|
!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,		!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
*LD->getMemOperand(), &FastLD) \|\|		*LD->getMemOperand(), &FastLD) \|\|
!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,		!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
▲ Show 20 Lines • Show All 595 Lines • ▼ Show 20 Lines	for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)		if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
FirstZeroAfterNonZero = i;		FirstZeroAfterNonZero = i;
}		}
NonZero \|= !IsElementZero;		NonZero \|= !IsElementZero;

// Find a legal type for the constant store.		// Find a legal type for the constant store.
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;		unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);		EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
bool IsFast = false;		unsigned IsFast = 0;

// Break early when size is too large to be legal.		// Break early when size is too large to be legal.
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)		if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
break;		break;

if (TLI.isTypeLegal(StoreTy) &&		if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy,		TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
DAG.getMachineFunction()) &&		DAG.getMachineFunction()) &&
▲ Show 20 Lines • Show All 93 Lines • ▼ Show 20 Lines	while (NumConsecutiveStores >= 2) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;		LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();		unsigned FirstStoreAS = FirstInChain->getAddressSpace();
Align FirstStoreAlign = FirstInChain->getAlign();		Align FirstStoreAlign = FirstInChain->getAlign();
unsigned NumStoresToMerge = 1;		unsigned NumStoresToMerge = 1;
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {		for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
// Find a legal type for the vector store.		// Find a legal type for the vector store.
unsigned Elts = (i + 1) * NumMemElts;		unsigned Elts = (i + 1) * NumMemElts;
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);		EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
bool IsFast = false;		unsigned IsFast = 0;

// Break early when size is too large to be legal.		// Break early when size is too large to be legal.
if (Ty.getSizeInBits() > MaximumLegalStoreInBits)		if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
break;		break;

if (TLI.isTypeLegal(Ty) &&		if (TLI.isTypeLegal(Ty) &&
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&		TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, Ty,		TLI.allowsMemoryAccess(Context, DL, Ty,
▲ Show 20 Lines • Show All 136 Lines • ▼ Show 20 Lines	for (unsigned i = 1; i < LoadNodes.size(); ++i) {
// Find a legal type for the vector store.		// Find a legal type for the vector store.
unsigned Elts = (i + 1) * NumMemElts;		unsigned Elts = (i + 1) * NumMemElts;
EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);		EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);

// Break early when size is too large to be legal.		// Break early when size is too large to be legal.
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)		if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
break;		break;

bool IsFastSt = false;		unsigned IsFastSt = 0;
bool IsFastLd = false;		unsigned IsFastLd = 0;
// Don't try vector types if we need a rotate. We may still fail the		// Don't try vector types if we need a rotate. We may still fail the
// legality checks for the integer type, but we can't handle the rotate		// legality checks for the integer type, but we can't handle the rotate
// case with vectors.		// case with vectors.
// FIXME: We could use a shuffle in place of the rotate.		// FIXME: We could use a shuffle in place of the rotate.
if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&		if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy,		TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
DAG.getMachineFunction()) &&		DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,		TLI.allowsMemoryAccess(Context, DL, StoreTy,
▲ Show 20 Lines • Show All 1,068 Lines • ▼ Show 20 Lines	if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
Alignment = commonAlignment(Alignment, PtrOff);		Alignment = commonAlignment(Alignment, PtrOff);
} else {		} else {
// Discard the pointer info except the address space because the memory		// Discard the pointer info except the address space because the memory
// operand can't represent this new access since the offset is variable.		// operand can't represent this new access since the offset is variable.
MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());		MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);		Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
}		}

bool IsFast = false;		unsigned IsFast = 0;
if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,		if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
OriginalLoad->getAddressSpace(), Alignment,		OriginalLoad->getAddressSpace(), Alignment,
OriginalLoad->getMemOperand()->getFlags(),		OriginalLoad->getMemOperand()->getFlags(),
&IsFast) \|\|		&IsFast) \|\|
!IsFast)		!IsFast)
return SDValue();		return SDValue();

SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),		SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
▲ Show 20 Lines • Show All 5,372 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 255 Lines • ▼ Show 20 Lines	while (VTSize > Size) {
if (NewVT == MVT::i8)		if (NewVT == MVT::i8)
break;		break;
} while (!isSafeMemOpType(NewVT.getSimpleVT()));		} while (!isSafeMemOpType(NewVT.getSimpleVT()));
}		}
NewVTSize = NewVT.getSizeInBits() / 8;		NewVTSize = NewVT.getSizeInBits() / 8;

// If the new VT cannot cover all of the remaining bits, then consider		// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.		// issuing a (or a pair of) unaligned and overlapping load / store.
bool Fast;		unsigned Fast;
if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&		if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
allowsMisalignedMemoryAccesses(		allowsMisalignedMemoryAccesses(
VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),		VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
MachineMemOperand::MONone, &Fast) &&		MachineMemOperand::MONone, &Fast) &&
Fast)		Fast)
VTSize = Size;		VTSize = Size;
else {		else {
VT = NewVT;		VT = NewVT;
▲ Show 20 Lines • Show All 9,919 Lines • Show Last 20 Lines

llvm/lib/CodeGen/TargetLoweringBase.cpp

	Show First 20 Lines • Show All 1,710 Lines • ▼ Show 20 Lines
	/// alignment, not its logarithm.			/// alignment, not its logarithm.
	uint64_t TargetLoweringBase::getByValTypeAlignment(Type *Ty,			uint64_t TargetLoweringBase::getByValTypeAlignment(Type *Ty,
	const DataLayout &DL) const {			const DataLayout &DL) const {
	return DL.getABITypeAlign(Ty).value();			return DL.getABITypeAlign(Ty).value();
	}			}

	bool TargetLoweringBase::allowsMemoryAccessForAlignment(			bool TargetLoweringBase::allowsMemoryAccessForAlignment(
	LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,			LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
	Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const {			Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const {
	// Check if the specified alignment is sufficient based on the data layout.			// Check if the specified alignment is sufficient based on the data layout.
	// TODO: While using the data layout works in practice, a better solution			// TODO: While using the data layout works in practice, a better solution
	// would be to implement this check directly (make this a virtual function).			// would be to implement this check directly (make this a virtual function).
	// For example, the ABI alignment may change based on software platform while			// For example, the ABI alignment may change based on software platform while
	// this function should only be affected by hardware implementation.			// this function should only be affected by hardware implementation.
	Type *Ty = VT.getTypeForEVT(Context);			Type *Ty = VT.getTypeForEVT(Context);
	if (VT.isZeroSized() \|\| Alignment >= DL.getABITypeAlign(Ty)) {			if (VT.isZeroSized() \|\| Alignment >= DL.getABITypeAlign(Ty)) {
	// Assume that an access that meets the ABI-specified alignment is fast.			// Assume that an access that meets the ABI-specified alignment is fast.
	if (Fast != nullptr)			if (Fast != nullptr)
	*Fast = true;			*Fast = 1;
	return true;			return true;
	}			}

	// This is a misaligned access.			// This is a misaligned access.
	return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast);			return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast);
	}			}

	bool TargetLoweringBase::allowsMemoryAccessForAlignment(			bool TargetLoweringBase::allowsMemoryAccessForAlignment(
	LLVMContext &Context, const DataLayout &DL, EVT VT,			LLVMContext &Context, const DataLayout &DL, EVT VT,
	const MachineMemOperand &MMO, bool *Fast) const {			const MachineMemOperand &MMO, unsigned *Fast) const {
	return allowsMemoryAccessForAlignment(Context, DL, VT, MMO.getAddrSpace(),			return allowsMemoryAccessForAlignment(Context, DL, VT, MMO.getAddrSpace(),
	MMO.getAlign(), MMO.getFlags(), Fast);			MMO.getAlign(), MMO.getFlags(), Fast);
	}			}

	bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,			bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
	const DataLayout &DL, EVT VT,			const DataLayout &DL, EVT VT,
	unsigned AddrSpace, Align Alignment,			unsigned AddrSpace, Align Alignment,
	MachineMemOperand::Flags Flags,			MachineMemOperand::Flags Flags,
	bool *Fast) const {			unsigned *Fast) const {
	return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment,			return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment,
	Flags, Fast);			Flags, Fast);
	}			}

	bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,			bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
	const DataLayout &DL, EVT VT,			const DataLayout &DL, EVT VT,
	const MachineMemOperand &MMO,			const MachineMemOperand &MMO,
	bool *Fast) const {			unsigned *Fast) const {
	return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(),			return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(),
	MMO.getFlags(), Fast);			MMO.getFlags(), Fast);
	}			}

	bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,			bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
	const DataLayout &DL, LLT Ty,			const DataLayout &DL, LLT Ty,
	const MachineMemOperand &MMO,			const MachineMemOperand &MMO,
	bool *Fast) const {			unsigned *Fast) const {
	EVT VT = getApproximateEVTForLLT(Ty, DL, Context);			EVT VT = getApproximateEVTForLLT(Ty, DL, Context);
	return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(),			return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(),
	MMO.getFlags(), Fast);			MMO.getFlags(), Fast);
	}			}

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// TargetTransformInfo Helpers			// TargetTransformInfo Helpers
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	▲ Show 20 Lines • Show All 557 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Show First 20 Lines • Show All 531 Lines • ▼ Show 20 Lines	public:

MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;		MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;

/// Returns true if the target allows unaligned memory accesses of the		/// Returns true if the target allows unaligned memory accesses of the
/// specified type.		/// specified type.
bool allowsMisalignedMemoryAccesses(		bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),		EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,		MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *Fast = nullptr) const override;		unsigned *Fast = nullptr) const override;
/// LLT variant.		/// LLT variant.
bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,		bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
Align Alignment,		Align Alignment,
MachineMemOperand::Flags Flags,		MachineMemOperand::Flags Flags,
bool *Fast = nullptr) const override;		unsigned *Fast = nullptr) const override;

/// Provide custom lowering hooks for some operations.		/// Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;		SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;

const char *getTargetNodeName(unsigned Opcode) const override;		const char *getTargetNodeName(unsigned Opcode) const override;

SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;		SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;

▲ Show 20 Lines • Show All 661 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 2,051 Lines • ▼ Show 20 Lines

MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,		MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
EVT) const {		EVT) const {
return MVT::i64;		return MVT::i64;
}		}

bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(		bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,		EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {		unsigned *Fast) const {
if (Subtarget->requiresStrictAlign())		if (Subtarget->requiresStrictAlign())
return false;		return false;

if (Fast) {		if (Fast) {
// Some CPUs are fine with unaligned stores except for 128-bit ones.		// Some CPUs are fine with unaligned stores except for 128-bit ones.
*Fast = !Subtarget->isMisaligned128StoreSlow() \|\| VT.getStoreSize() != 16 \|\|		*Fast = !Subtarget->isMisaligned128StoreSlow() \|\| VT.getStoreSize() != 16 \|\|
// See comments in performSTORECombine() for more details about		// See comments in performSTORECombine() for more details about
// these conditions.		// these conditions.

// Code that uses clang vector extensions can mark that it		// Code that uses clang vector extensions can mark that it
// wants unaligned accesses to be treated as fast by		// wants unaligned accesses to be treated as fast by
// underspecifying alignment to be 1 or 2.		// underspecifying alignment to be 1 or 2.
Alignment <= 2 \|\|		Alignment <= 2 \|\|

// Disregard v2i64. Memcpy lowering produces those and splitting		// Disregard v2i64. Memcpy lowering produces those and splitting
// them regresses performance on micro-benchmarks and olden/bh.		// them regresses performance on micro-benchmarks and olden/bh.
VT == MVT::v2i64;		VT == MVT::v2i64;
}		}
return true;		return true;
}		}

// Same as above but handling LLTs instead.		// Same as above but handling LLTs instead.
bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(		bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,		LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {		unsigned *Fast) const {
if (Subtarget->requiresStrictAlign())		if (Subtarget->requiresStrictAlign())
return false;		return false;

if (Fast) {		if (Fast) {
// Some CPUs are fine with unaligned stores except for 128-bit ones.		// Some CPUs are fine with unaligned stores except for 128-bit ones.
*Fast = !Subtarget->isMisaligned128StoreSlow() \|\|		*Fast = !Subtarget->isMisaligned128StoreSlow() \|\|
Ty.getSizeInBytes() != 16 \|\|		Ty.getSizeInBytes() != 16 \|\|
// See comments in performSTORECombine() for more details about		// See comments in performSTORECombine() for more details about
▲ Show 20 Lines • Show All 12,230 Lines • ▼ Show 20 Lines	EVT AArch64TargetLowering::getOptimalMemOpType(
bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;		bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
// Only use AdvSIMD to implement memset of 32-byte and above. It would have		// Only use AdvSIMD to implement memset of 32-byte and above. It would have
// taken one instruction to materialize the v2i64 zero and one store (with		// taken one instruction to materialize the v2i64 zero and one store (with
// restrictive addressing mode). Just do i64 stores.		// restrictive addressing mode). Just do i64 stores.
bool IsSmallMemset = Op.isMemset() && Op.size() < 32;		bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {		auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
if (Op.isAligned(AlignCheck))		if (Op.isAligned(AlignCheck))
return true;		return true;
bool Fast;		unsigned Fast;
return allowsMisalignedMemoryAccesses(VT, 0, Align(1),		return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
MachineMemOperand::MONone, &Fast) &&		MachineMemOperand::MONone, &Fast) &&
Fast;		Fast;
};		};

if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&		if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
AlignmentIsAcceptable(MVT::v16i8, Align(16)))		AlignmentIsAcceptable(MVT::v16i8, Align(16)))
return MVT::v16i8;		return MVT::v16i8;
Show All 13 Lines	LLT AArch64TargetLowering::getOptimalMemOpLLT(
bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;		bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
// Only use AdvSIMD to implement memset of 32-byte and above. It would have		// Only use AdvSIMD to implement memset of 32-byte and above. It would have
// taken one instruction to materialize the v2i64 zero and one store (with		// taken one instruction to materialize the v2i64 zero and one store (with
// restrictive addressing mode). Just do i64 stores.		// restrictive addressing mode). Just do i64 stores.
bool IsSmallMemset = Op.isMemset() && Op.size() < 32;		bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {		auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
if (Op.isAligned(AlignCheck))		if (Op.isAligned(AlignCheck))
return true;		return true;
bool Fast;		unsigned Fast;
return allowsMisalignedMemoryAccesses(VT, 0, Align(1),		return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
MachineMemOperand::MONone, &Fast) &&		MachineMemOperand::MONone, &Fast) &&
Fast;		Fast;
};		};

if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&		if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
AlignmentIsAcceptable(MVT::v2i64, Align(16)))		AlignmentIsAcceptable(MVT::v2i64, Align(16)))
return LLT::fixed_vector(2, 64);		return LLT::fixed_vector(2, 64);
▲ Show 20 Lines • Show All 9,012 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Show First 20 Lines • Show All 677 Lines • ▼ Show 20 Lines	if (LoadTy.getScalarType() == MVT::i32)
return false;		return false;

unsigned LScalarSize = LoadTy.getScalarSizeInBits();		unsigned LScalarSize = LoadTy.getScalarSizeInBits();
unsigned CastScalarSize = CastTy.getScalarSizeInBits();		unsigned CastScalarSize = CastTy.getScalarSizeInBits();

if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32))		if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32))
return false;		return false;

bool Fast = false;		unsigned Fast = 0;
return allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),		return allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
CastTy, MMO, &Fast) &&		CastTy, MMO, &Fast) &&
Fast;		Fast;
}		}

// SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also		// SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also
// profitable with the expansion for 64-bit since it's generally good to		// profitable with the expansion for 64-bit since it's generally good to
// speculate things.		// speculate things.
▲ Show 20 Lines • Show All 2,203 Lines • ▼ Show 20 Lines	SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,

SDLoc SL(N);		SDLoc SL(N);
SelectionDAG &DAG = DCI.DAG;		SelectionDAG &DAG = DCI.DAG;
EVT VT = LN->getMemoryVT();		EVT VT = LN->getMemoryVT();

unsigned Size = VT.getStoreSize();		unsigned Size = VT.getStoreSize();
Align Alignment = LN->getAlign();		Align Alignment = LN->getAlign();
if (Alignment < Size && isTypeLegal(VT)) {		if (Alignment < Size && isTypeLegal(VT)) {
bool IsFast;		unsigned IsFast;
unsigned AS = LN->getAddressSpace();		unsigned AS = LN->getAddressSpace();

// Expand unaligned loads earlier than legalization. Due to visitation order		// Expand unaligned loads earlier than legalization. Due to visitation order
// problems during legalization, the emitted instructions to pack and unpack		// problems during legalization, the emitted instructions to pack and unpack
// the bytes again are not eliminated in the case of an unaligned copy.		// the bytes again are not eliminated in the case of an unaligned copy.
if (!allowsMisalignedMemoryAccesses(		if (!allowsMisalignedMemoryAccesses(
VT, AS, Alignment, LN->getMemOperand()->getFlags(), &IsFast)) {		VT, AS, Alignment, LN->getMemOperand()->getFlags(), &IsFast)) {
if (VT.isVector())		if (VT.isVector())
Show All 36 Lines	SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,

EVT VT = SN->getMemoryVT();		EVT VT = SN->getMemoryVT();
unsigned Size = VT.getStoreSize();		unsigned Size = VT.getStoreSize();

SDLoc SL(N);		SDLoc SL(N);
SelectionDAG &DAG = DCI.DAG;		SelectionDAG &DAG = DCI.DAG;
Align Alignment = SN->getAlign();		Align Alignment = SN->getAlign();
if (Alignment < Size && isTypeLegal(VT)) {		if (Alignment < Size && isTypeLegal(VT)) {
bool IsFast;		unsigned IsFast;
unsigned AS = SN->getAddressSpace();		unsigned AS = SN->getAddressSpace();

// Expand unaligned stores earlier than legalization. Due to visitation		// Expand unaligned stores earlier than legalization. Due to visitation
// order problems during legalization, the emitted instructions to pack and		// order problems during legalization, the emitted instructions to pack and
// unpack the bytes again are not eliminated in the case of an unaligned		// unpack the bytes again are not eliminated in the case of an unaligned
// copy.		// copy.
if (!allowsMisalignedMemoryAccesses(		if (!allowsMisalignedMemoryAccesses(
VT, AS, Alignment, SN->getMemOperand()->getFlags(), &IsFast)) {		VT, AS, Alignment, SN->getMemOperand()->getFlags(), &IsFast)) {
▲ Show 20 Lines • Show All 1,907 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Show First 20 Lines • Show All 400 Lines • ▼ Show 20 Lines	static bool shouldWidenLoad(const GCNSubtarget &ST, LLT MemoryTy,
//		//
// TODO: Could check dereferenceable for less aligned cases.		// TODO: Could check dereferenceable for less aligned cases.
unsigned RoundedSize = NextPowerOf2(SizeInBits);		unsigned RoundedSize = NextPowerOf2(SizeInBits);
if (AlignInBits < RoundedSize)		if (AlignInBits < RoundedSize)
return false;		return false;

// Do not widen if it would introduce a slow unaligned load.		// Do not widen if it would introduce a slow unaligned load.
const SITargetLowering *TLI = ST.getTargetLowering();		const SITargetLowering *TLI = ST.getTargetLowering();
bool Fast = false;		unsigned Fast = 0;
return TLI->allowsMisalignedMemoryAccessesImpl(		return TLI->allowsMisalignedMemoryAccessesImpl(
RoundedSize, AddrSpace, Align(AlignInBits / 8),		RoundedSize, AddrSpace, Align(AlignInBits / 8),
MachineMemOperand::MOLoad, &Fast) &&		MachineMemOperand::MOLoad, &Fast) &&
Fast;		Fast;
}		}

static bool shouldWidenLoad(const GCNSubtarget &ST, const LegalityQuery &Query,		static bool shouldWidenLoad(const GCNSubtarget &ST, const LegalityQuery &Query,
unsigned Opcode) {		unsigned Opcode) {
▲ Show 20 Lines • Show All 5,359 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/R600ISelLowering.h

Show First 20 Lines • Show All 46 Lines • ▼ Show 20 Lines	EVT getSetCCResultType(const DataLayout &DL, LLVMContext &,
EVT VT) const override;		EVT VT) const override;

bool canMergeStoresTo(unsigned AS, EVT MemVT,		bool canMergeStoresTo(unsigned AS, EVT MemVT,
const MachineFunction &MF) const override;		const MachineFunction &MF) const override;

bool allowsMisalignedMemoryAccesses(		bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AS, Align Alignment,		EVT VT, unsigned AS, Align Alignment,
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,		MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *IsFast = nullptr) const override;		unsigned *IsFast = nullptr) const override;

bool canCombineTruncStore(EVT ValVT, EVT MemVT,		bool canCombineTruncStore(EVT ValVT, EVT MemVT,
bool LegalOperations) const override {		bool LegalOperations) const override {
// R600 has "custom" lowering for truncating stores despite not supporting		// R600 has "custom" lowering for truncating stores despite not supporting
// those instructions. If we allow that custom lowering in the DAG combiner		// those instructions. If we allow that custom lowering in the DAG combiner
// then all truncates are merged into truncating stores, giving worse code		// then all truncates are merged into truncating stores, giving worse code
// generation. This hook prevents the DAG combiner performing that combine.		// generation. This hook prevents the DAG combiner performing that combine.
return isTruncStoreLegal(ValVT, MemVT);		return isTruncStoreLegal(ValVT, MemVT);
▲ Show 20 Lines • Show All 57 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/R600ISelLowering.cpp

Show First 20 Lines • Show All 1,515 Lines • ▼ Show 20 Lines	bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
if ((AS == AMDGPUAS::LOCAL_ADDRESS \|\| AS == AMDGPUAS::PRIVATE_ADDRESS)) {		if ((AS == AMDGPUAS::LOCAL_ADDRESS \|\| AS == AMDGPUAS::PRIVATE_ADDRESS)) {
return (MemVT.getSizeInBits() <= 32);		return (MemVT.getSizeInBits() <= 32);
}		}
return true;		return true;
}		}

bool R600TargetLowering::allowsMisalignedMemoryAccesses(		bool R600TargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,		EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *IsFast) const {		unsigned *IsFast) const {
if (IsFast)		if (IsFast)
*IsFast = false;		*IsFast = 0;

if (!VT.isSimple() \|\| VT == MVT::Other)		if (!VT.isSimple() \|\| VT == MVT::Other)
return false;		return false;

if (VT.bitsLT(MVT::i32))		if (VT.bitsLT(MVT::i32))
return false;		return false;

// TODO: This is a rough estimate.		// TODO: This is a rough estimate.
if (IsFast)		if (IsFast)
*IsFast = true;		*IsFast = 1;

return VT.bitsGT(MVT::i32) && Alignment >= Align(4);		return VT.bitsGT(MVT::i32) && Alignment >= Align(4);
}		}

static SDValue CompactSwizzlableVector(		static SDValue CompactSwizzlableVector(
SelectionDAG &DAG, SDValue VectorEntry,		SelectionDAG &DAG, SDValue VectorEntry,
DenseMap<unsigned, unsigned> &RemapSwizzle) {		DenseMap<unsigned, unsigned> &RemapSwizzle) {
assert(RemapSwizzle.empty());		assert(RemapSwizzle.empty());
▲ Show 20 Lines • Show All 616 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/SIISelLowering.h

Show First 20 Lines • Show All 285 Lines • ▼ Show 20 Lines	bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
Instruction *I = nullptr) const override;		Instruction *I = nullptr) const override;

bool canMergeStoresTo(unsigned AS, EVT MemVT,		bool canMergeStoresTo(unsigned AS, EVT MemVT,
const MachineFunction &MF) const override;		const MachineFunction &MF) const override;

bool allowsMisalignedMemoryAccessesImpl(		bool allowsMisalignedMemoryAccessesImpl(
unsigned Size, unsigned AddrSpace, Align Alignment,		unsigned Size, unsigned AddrSpace, Align Alignment,
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,		MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *IsFast = nullptr) const;		unsigned *IsFast = nullptr) const;

bool allowsMisalignedMemoryAccesses(		bool allowsMisalignedMemoryAccesses(
LLT Ty, unsigned AddrSpace, Align Alignment,		LLT Ty, unsigned AddrSpace, Align Alignment,
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,		MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *IsFast = nullptr) const override {		unsigned *IsFast = nullptr) const override {
if (IsFast)		if (IsFast)
*IsFast = false;		*IsFast = 0;
return allowsMisalignedMemoryAccessesImpl(Ty.getSizeInBits(), AddrSpace,		return allowsMisalignedMemoryAccessesImpl(Ty.getSizeInBits(), AddrSpace,
Alignment, Flags, IsFast);		Alignment, Flags, IsFast);
}		}

bool allowsMisalignedMemoryAccesses(		bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AS, Align Alignment,		EVT VT, unsigned AS, Align Alignment,
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,		MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *IsFast = nullptr) const override;		unsigned *IsFast = nullptr) const override;

EVT getOptimalMemOpType(const MemOp &Op,		EVT getOptimalMemOpType(const MemOp &Op,
const AttributeList &FuncAttributes) const override;		const AttributeList &FuncAttributes) const override;

bool isMemOpUniform(const SDNode *N) const;		bool isMemOpUniform(const SDNode *N) const;
bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const;		bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const;

static bool isNonGlobalAddrSpace(unsigned AS);		static bool isNonGlobalAddrSpace(unsigned AS);
▲ Show 20 Lines • Show All 223 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,380 Lines • ▼ Show 20 Lines	bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
} else if (AS == AMDGPUAS::LOCAL_ADDRESS \|\| AS == AMDGPUAS::REGION_ADDRESS) {		} else if (AS == AMDGPUAS::LOCAL_ADDRESS \|\| AS == AMDGPUAS::REGION_ADDRESS) {
return (MemVT.getSizeInBits() <= 2 * 32);		return (MemVT.getSizeInBits() <= 2 * 32);
}		}
return true;		return true;
}		}

bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(		bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
unsigned Size, unsigned AddrSpace, Align Alignment,		unsigned Size, unsigned AddrSpace, Align Alignment,
MachineMemOperand::Flags Flags, bool *IsFast) const {		MachineMemOperand::Flags Flags, unsigned *IsFast) const {
if (IsFast)		if (IsFast)
*IsFast = false;		*IsFast = 0;

if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS \|\|		if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS \|\|
AddrSpace == AMDGPUAS::REGION_ADDRESS) {		AddrSpace == AMDGPUAS::REGION_ADDRESS) {
// Check if alignment requirements for ds_read/write instructions are		// Check if alignment requirements for ds_read/write instructions are
// disabled.		// disabled.
if (!Subtarget->hasUnalignedDSAccessEnabled() && Alignment < Align(4))		if (!Subtarget->hasUnalignedDSAccessEnabled() && Alignment < Align(4))
return false;		return false;

Show All 22 Lines	case 64:
// ds_read2/write2_b32 with adjacent offsets.		// ds_read2/write2_b32 with adjacent offsets.
RequiredAlignment = Align(4);		RequiredAlignment = Align(4);

if (Subtarget->hasUnalignedDSAccessEnabled()) {		if (Subtarget->hasUnalignedDSAccessEnabled()) {
// We will either select ds_read_b64/ds_write_b64 or ds_read2_b32/		// We will either select ds_read_b64/ds_write_b64 or ds_read2_b32/
// ds_write2_b32 depending on the alignment. In either case with either		// ds_write2_b32 depending on the alignment. In either case with either
// alignment there is no faster way of doing this.		// alignment there is no faster way of doing this.
if (IsFast)		if (IsFast)
*IsFast = true;		*IsFast = 1;
return true;		return true;
}		}

break;		break;
case 96:		case 96:
if (!Subtarget->hasDS96AndDS128())		if (!Subtarget->hasDS96AndDS128())
return false;		return false;

Show All 23 Lines	case 128:

if (Subtarget->hasUnalignedDSAccessEnabled()) {		if (Subtarget->hasUnalignedDSAccessEnabled()) {
// Naturally aligned access is fastest. However, also report it is Fast		// Naturally aligned access is fastest. However, also report it is Fast
// if memory is aligned less than DWORD. A narrow load or store will be		// if memory is aligned less than DWORD. A narrow load or store will be
// be equally slow as a single ds_read_b128/ds_write_b128, but there		// be equally slow as a single ds_read_b128/ds_write_b128, but there
// will be more of them, so overall we will pay less penalty issuing a		// will be more of them, so overall we will pay less penalty issuing a
// single instruction.		// single instruction.
if (IsFast)		if (IsFast)
*IsFast = Alignment >= RequiredAlignment \|\| Alignment < Align(4);		*IsFast= Alignment >= RequiredAlignment \|\| Alignment < Align(4);
return true;		return true;
}		}

break;		break;
default:		default:
if (Size > 32)		if (Size > 32)
return false;		return false;

▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines	bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
// Smaller than dword value must be aligned.		// Smaller than dword value must be aligned.
if (Size < 32)		if (Size < 32)
return false;		return false;

// 8.1.6 - For Dword or larger reads or writes, the two LSBs of the		// 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
// byte-address are ignored, thus forcing Dword alignment.		// byte-address are ignored, thus forcing Dword alignment.
// This applies to private, global, and constant memory.		// This applies to private, global, and constant memory.
if (IsFast)		if (IsFast)
*IsFast = true;		*IsFast = 1;

return Size >= 32 && Alignment >= Align(4);		return Size >= 32 && Alignment >= Align(4);
}		}

bool SITargetLowering::allowsMisalignedMemoryAccesses(		bool SITargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,		EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *IsFast) const {		unsigned *IsFast) const {
bool Allow = allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace,		bool Allow = allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace,
Alignment, Flags, IsFast);		Alignment, Flags, IsFast);

if (Allow && IsFast && Subtarget->hasUnalignedDSAccessEnabled() &&		if (Allow && IsFast && Subtarget->hasUnalignedDSAccessEnabled() &&
(AddrSpace == AMDGPUAS::LOCAL_ADDRESS \|\|		(AddrSpace == AMDGPUAS::LOCAL_ADDRESS \|\|
AddrSpace == AMDGPUAS::REGION_ADDRESS)) {		AddrSpace == AMDGPUAS::REGION_ADDRESS)) {
// Lie it is fast if +unaligned-access-mode is passed so that DS accesses		// Lie it is fast if +unaligned-access-mode is passed so that DS accesses
// get vectorized. We could use ds_read2_b/ds_write2_b instructions on a		// get vectorized. We could use ds_read2_b/ds_write2_b instructions on a
// misaligned data which is faster than a pair of ds_read_b/ds_write_b		// misaligned data which is faster than a pair of ds_read_b/ds_write_b
// which would be equally misaligned.		// which would be equally misaligned.
// This is only used by the common passes, selection always calls the		// This is only used by the common passes, selection always calls the
// allowsMisalignedMemoryAccessesImpl version.		// allowsMisalignedMemoryAccessesImpl version.
*IsFast = true;		*IsFast= 1;
}		}

return Allow;		return Allow;
}		}

EVT SITargetLowering::getOptimalMemOpType(		EVT SITargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {		const MemOp &Op, const AttributeList &FuncAttributes) const {
// FIXME: Should account for address space here.		// FIXME: Should account for address space here.
▲ Show 20 Lines • Show All 7,188 Lines • ▼ Show 20 Lines	case 16:
if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores())		if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores())
return WidenOrSplitVectorLoad(Op, DAG);		return WidenOrSplitVectorLoad(Op, DAG);

return SDValue();		return SDValue();
default:		default:
llvm_unreachable("unsupported private_element_size");		llvm_unreachable("unsupported private_element_size");
}		}
} else if (AS == AMDGPUAS::LOCAL_ADDRESS \|\| AS == AMDGPUAS::REGION_ADDRESS) {		} else if (AS == AMDGPUAS::LOCAL_ADDRESS \|\| AS == AMDGPUAS::REGION_ADDRESS) {
bool Fast = false;		unsigned Fast = 0;
auto Flags = Load->getMemOperand()->getFlags();		auto Flags = Load->getMemOperand()->getFlags();
if (allowsMisalignedMemoryAccessesImpl(MemVT.getSizeInBits(), AS,		if (allowsMisalignedMemoryAccessesImpl(MemVT.getSizeInBits(), AS,
Load->getAlign(), Flags, &Fast) &&		Load->getAlign(), Flags, &Fast) &&
Fast)		Fast)
return SDValue();		return SDValue();

if (MemVT.isVector())		if (MemVT.isVector())
return SplitVectorLoad(Op, DAG);		return SplitVectorLoad(Op, DAG);
▲ Show 20 Lines • Show All 482 Lines • ▼ Show 20 Lines	case 16:
if (NumElements > 4 \|\|		if (NumElements > 4 \|\|
(NumElements == 3 && !Subtarget->enableFlatScratch()))		(NumElements == 3 && !Subtarget->enableFlatScratch()))
return SplitVectorStore(Op, DAG);		return SplitVectorStore(Op, DAG);
return SDValue();		return SDValue();
default:		default:
llvm_unreachable("unsupported private_element_size");		llvm_unreachable("unsupported private_element_size");
}		}
} else if (AS == AMDGPUAS::LOCAL_ADDRESS \|\| AS == AMDGPUAS::REGION_ADDRESS) {		} else if (AS == AMDGPUAS::LOCAL_ADDRESS \|\| AS == AMDGPUAS::REGION_ADDRESS) {
bool Fast = false;		unsigned Fast = 0;
auto Flags = Store->getMemOperand()->getFlags();		auto Flags = Store->getMemOperand()->getFlags();
if (allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AS,		if (allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AS,
Store->getAlign(), Flags, &Fast) &&		Store->getAlign(), Flags, &Fast) &&
Fast)		Fast)
return SDValue();		return SDValue();

if (VT.isVector())		if (VT.isVector())
return SplitVectorStore(Op, DAG);		return SplitVectorStore(Op, DAG);
▲ Show 20 Lines • Show All 3,955 Lines • Show Last 20 Lines

llvm/lib/Target/ARM/ARMISelLowering.h

Show First 20 Lines • Show All 439 Lines • ▼ Show 20 Lines	public:
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override;		bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override;

/// allowsMisalignedMemoryAccesses - Returns true if the target allows		/// allowsMisalignedMemoryAccesses - Returns true if the target allows
/// unaligned memory accesses of the specified type. Returns whether it		/// unaligned memory accesses of the specified type. Returns whether it
/// is "fast" by reference in the second argument.		/// is "fast" by reference in the second argument.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,		bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
Align Alignment,		Align Alignment,
MachineMemOperand::Flags Flags,		MachineMemOperand::Flags Flags,
bool *Fast) const override;		unsigned *Fast) const override;

EVT getOptimalMemOpType(const MemOp &Op,		EVT getOptimalMemOpType(const MemOp &Op,
const AttributeList &FuncAttributes) const override;		const AttributeList &FuncAttributes) const override;

bool isTruncateFree(Type SrcTy, Type DstTy) const override;		bool isTruncateFree(Type SrcTy, Type DstTy) const override;
bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;		bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;		bool isZExtFree(SDValue Val, EVT VT2) const override;
bool shouldSinkOperands(Instruction *I,		bool shouldSinkOperands(Instruction *I,
▲ Show 20 Lines • Show All 543 Lines • Show Last 20 Lines

llvm/lib/Target/ARM/ARMISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 18,811 Lines • ▼ Show 20 Lines
bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,		bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
EVT VT) const {		EVT VT) const {
return (VT == MVT::f32) && (Opc == ISD::LOAD \|\| Opc == ISD::STORE);		return (VT == MVT::f32) && (Opc == ISD::LOAD \|\| Opc == ISD::STORE);
}		}

bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,		bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
Align Alignment,		Align Alignment,
MachineMemOperand::Flags,		MachineMemOperand::Flags,
bool *Fast) const {		unsigned *Fast) const {
// Depends what it gets converted into if the type is weird.		// Depends what it gets converted into if the type is weird.
if (!VT.isSimple())		if (!VT.isSimple())
return false;		return false;

// The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus		// The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus
bool AllowsUnaligned = Subtarget->allowsUnalignedMem();		bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
auto Ty = VT.getSimpleVT().SimpleTy;		auto Ty = VT.getSimpleVT().SimpleTy;

if (Ty == MVT::i8 \|\| Ty == MVT::i16 \|\| Ty == MVT::i32) {		if (Ty == MVT::i8 \|\| Ty == MVT::i16 \|\| Ty == MVT::i32) {
// Unaligned access can use (for example) LRDB, LRDH, LDR		// Unaligned access can use (for example) LRDB, LRDH, LDR
if (AllowsUnaligned) {		if (AllowsUnaligned) {
if (Fast)		if (Fast)
*Fast = Subtarget->hasV7Ops();		*Fast = Subtarget->hasV7Ops();
return true;		return true;
}		}
}		}

if (Ty == MVT::f64 \|\| Ty == MVT::v2f64) {		if (Ty == MVT::f64 \|\| Ty == MVT::v2f64) {
// For any little-endian targets with neon, we can support unaligned ld/st		// For any little-endian targets with neon, we can support unaligned ld/st
// of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.		// of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
// A big-endian target may also explicitly support unaligned accesses		// A big-endian target may also explicitly support unaligned accesses
if (Subtarget->hasNEON() && (AllowsUnaligned \|\| Subtarget->isLittle())) {		if (Subtarget->hasNEON() && (AllowsUnaligned \|\| Subtarget->isLittle())) {
if (Fast)		if (Fast)
*Fast = true;		*Fast = 1;
return true;		return true;
}		}
}		}

if (!Subtarget->hasMVEIntegerOps())		if (!Subtarget->hasMVEIntegerOps())
return false;		return false;

// These are for predicates		// These are for predicates
if ((Ty == MVT::v16i1 \|\| Ty == MVT::v8i1 \|\| Ty == MVT::v4i1 \|\|		if ((Ty == MVT::v16i1 \|\| Ty == MVT::v8i1 \|\| Ty == MVT::v4i1 \|\|
Ty == MVT::v2i1)) {		Ty == MVT::v2i1)) {
if (Fast)		if (Fast)
*Fast = true;		*Fast = 1;
return true;		return true;
}		}

// These are for truncated stores/narrowing loads. They are fine so long as		// These are for truncated stores/narrowing loads. They are fine so long as
// the alignment is at least the size of the item being loaded		// the alignment is at least the size of the item being loaded
if ((Ty == MVT::v4i8 \|\| Ty == MVT::v8i8 \|\| Ty == MVT::v4i16) &&		if ((Ty == MVT::v4i8 \|\| Ty == MVT::v8i8 \|\| Ty == MVT::v4i16) &&
Alignment >= VT.getScalarSizeInBits() / 8) {		Alignment >= VT.getScalarSizeInBits() / 8) {
if (Fast)		if (Fast)
Show All 9 Lines	bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
//		//
// For big endian, that is not the case. But can still emit a (VSTRB.U8;		// For big endian, that is not the case. But can still emit a (VSTRB.U8;
// VREV64.8) pair and get the same effect. This will likely be better than		// VREV64.8) pair and get the same effect. This will likely be better than
// aligning the vector through the stack.		// aligning the vector through the stack.
if (Ty == MVT::v16i8 \|\| Ty == MVT::v8i16 \|\| Ty == MVT::v8f16 \|\|		if (Ty == MVT::v16i8 \|\| Ty == MVT::v8i16 \|\| Ty == MVT::v8f16 \|\|
Ty == MVT::v4i32 \|\| Ty == MVT::v4f32 \|\| Ty == MVT::v2i64 \|\|		Ty == MVT::v4i32 \|\| Ty == MVT::v4f32 \|\| Ty == MVT::v2i64 \|\|
Ty == MVT::v2f64) {		Ty == MVT::v2f64) {
if (Fast)		if (Fast)
*Fast = true;		*Fast = 1;
return true;		return true;
}		}

return false;		return false;
}		}


EVT ARMTargetLowering::getOptimalMemOpType(		EVT ARMTargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {		const MemOp &Op, const AttributeList &FuncAttributes) const {
// See if we can use NEON instructions for this...		// See if we can use NEON instructions for this...
if ((Op.isMemcpy() \|\| Op.isZeroMemset()) && Subtarget->hasNEON() &&		if ((Op.isMemcpy() \|\| Op.isZeroMemset()) && Subtarget->hasNEON() &&
!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {		!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
bool Fast;		unsigned Fast;
if (Op.size() >= 16 &&		if (Op.size() >= 16 &&
(Op.isAligned(Align(16)) \|\|		(Op.isAligned(Align(16)) \|\|
(allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1),		(allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1),
MachineMemOperand::MONone, &Fast) &&		MachineMemOperand::MONone, &Fast) &&
Fast))) {		Fast))) {
return MVT::v2f64;		return MVT::v2f64;
} else if (Op.size() >= 8 &&		} else if (Op.size() >= 8 &&
(Op.isAligned(Align(8)) \|\|		(Op.isAligned(Align(8)) \|\|
▲ Show 20 Lines • Show All 3,026 Lines • Show Last 20 Lines

llvm/lib/Target/Hexagon/HexagonISelLowering.h

Show First 20 Lines • Show All 320 Lines • ▼ Show 20 Lines	public:
bool isLegalICmpImmediate(int64_t Imm) const override;		bool isLegalICmpImmediate(int64_t Imm) const override;

EVT getOptimalMemOpType(const MemOp &Op,		EVT getOptimalMemOpType(const MemOp &Op,
const AttributeList &FuncAttributes) const override;		const AttributeList &FuncAttributes) const override;

bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,		bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
unsigned AddrSpace, Align Alignment,		unsigned AddrSpace, Align Alignment,
MachineMemOperand::Flags Flags,		MachineMemOperand::Flags Flags,
bool *Fast) const override;		unsigned *Fast) const override;

bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,		bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
Align Alignment,		Align Alignment,
MachineMemOperand::Flags Flags,		MachineMemOperand::Flags Flags,
bool *Fast) const override;		unsigned *Fast) const override;

/// Returns relocation base for the given PIC jumptable.		/// Returns relocation base for the given PIC jumptable.
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)		SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
const override;		const override;

bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,		bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
EVT NewVT) const override;		EVT NewVT) const override;

▲ Show 20 Lines • Show All 94 Lines • ▼ Show 20 Lines	private:
MVT typeWidenToHvx(MVT Ty) const;		MVT typeWidenToHvx(MVT Ty) const;

SDValue opJoin(const VectorPair &Ops, const SDLoc &dl,		SDValue opJoin(const VectorPair &Ops, const SDLoc &dl,
SelectionDAG &DAG) const;		SelectionDAG &DAG) const;
VectorPair opSplit(SDValue Vec, const SDLoc &dl, SelectionDAG &DAG) const;		VectorPair opSplit(SDValue Vec, const SDLoc &dl, SelectionDAG &DAG) const;
SDValue opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const;		SDValue opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const;

bool allowsHvxMemoryAccess(MVT VecTy, MachineMemOperand::Flags Flags,		bool allowsHvxMemoryAccess(MVT VecTy, MachineMemOperand::Flags Flags,
bool *Fast) const;		unsigned *Fast) const;
bool allowsHvxMisalignedMemoryAccesses(MVT VecTy,		bool allowsHvxMisalignedMemoryAccesses(MVT VecTy,
MachineMemOperand::Flags Flags,		MachineMemOperand::Flags Flags,
bool *Fast) const;		unsigned *Fast) const;
void AdjustHvxInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const;		void AdjustHvxInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const;

bool isHvxSingleTy(MVT Ty) const;		bool isHvxSingleTy(MVT Ty) const;
bool isHvxPairTy(MVT Ty) const;		bool isHvxPairTy(MVT Ty) const;
bool isHvxBoolTy(MVT Ty) const;		bool isHvxBoolTy(MVT Ty) const;
SDValue convertToByteIndex(SDValue ElemIdx, MVT ElemTy,		SDValue convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
SelectionDAG &DAG) const;		SelectionDAG &DAG) const;
SDValue getIndexInWord32(SDValue Idx, MVT ElemTy, SelectionDAG &DAG) const;		SDValue getIndexInWord32(SDValue Idx, MVT ElemTy, SelectionDAG &DAG) const;
▲ Show 20 Lines • Show All 104 Lines • Show Last 20 Lines

llvm/lib/Target/Hexagon/HexagonISelLowering.cpp

Show First 20 Lines • Show All 3,668 Lines • ▼ Show 20 Lines	if (Op.size() >= 4 && Op.isAligned(Align(4)))
return MVT::i32;		return MVT::i32;
if (Op.size() >= 2 && Op.isAligned(Align(2)))		if (Op.size() >= 2 && Op.isAligned(Align(2)))
return MVT::i16;		return MVT::i16;
return MVT::Other;		return MVT::Other;
}		}

bool HexagonTargetLowering::allowsMemoryAccess(		bool HexagonTargetLowering::allowsMemoryAccess(
LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,		LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const {		Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const {
MVT SVT = VT.getSimpleVT();		MVT SVT = VT.getSimpleVT();
if (Subtarget.isHVXVectorType(SVT, true))		if (Subtarget.isHVXVectorType(SVT, true))
return allowsHvxMemoryAccess(SVT, Flags, Fast);		return allowsHvxMemoryAccess(SVT, Flags, Fast);
return TargetLoweringBase::allowsMemoryAccess(		return TargetLoweringBase::allowsMemoryAccess(
Context, DL, VT, AddrSpace, Alignment, Flags, Fast);		Context, DL, VT, AddrSpace, Alignment, Flags, Fast);
}		}

bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(		bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,		EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {		unsigned *Fast) const {
MVT SVT = VT.getSimpleVT();		MVT SVT = VT.getSimpleVT();
if (Subtarget.isHVXVectorType(SVT, true))		if (Subtarget.isHVXVectorType(SVT, true))
return allowsHvxMisalignedMemoryAccesses(SVT, Flags, Fast);		return allowsHvxMisalignedMemoryAccesses(SVT, Flags, Fast);
if (Fast)		if (Fast)
*Fast = false;		*Fast = 0;
return false;		return false;
}		}

std::pair<const TargetRegisterClass*, uint8_t>		std::pair<const TargetRegisterClass*, uint8_t>
HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,		HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
MVT VT) const {		MVT VT) const {
if (Subtarget.isHVXVectorType(VT, true)) {		if (Subtarget.isHVXVectorType(VT, true)) {
unsigned BitWidth = VT.getSizeInBits();		unsigned BitWidth = VT.getSizeInBits();
▲ Show 20 Lines • Show All 105 Lines • Show Last 20 Lines

llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp

	Show First 20 Lines • Show All 566 Lines • ▼ Show 20 Lines

	bool			bool
	HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {			HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
	return Subtarget.isHVXVectorType(Ty, true) &&			return Subtarget.isHVXVectorType(Ty, true) &&
	Ty.getVectorElementType() == MVT::i1;			Ty.getVectorElementType() == MVT::i1;
	}			}

	bool HexagonTargetLowering::allowsHvxMemoryAccess(			bool HexagonTargetLowering::allowsHvxMemoryAccess(
	MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const {			MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
	// Bool vectors are excluded by default, but make it explicit to			// Bool vectors are excluded by default, but make it explicit to
	// emphasize that bool vectors cannot be loaded or stored.			// emphasize that bool vectors cannot be loaded or stored.
	// Also, disallow double vector stores (to prevent unnecessary			// Also, disallow double vector stores (to prevent unnecessary
	// store widening in DAG combiner).			// store widening in DAG combiner).
	if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())			if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
	return false;			return false;
	if (!Subtarget.isHVXVectorType(VecTy, /IncludeBool=/false))			if (!Subtarget.isHVXVectorType(VecTy, /IncludeBool=/false))
	return false;			return false;
	if (Fast)			if (Fast)
	*Fast = true;			*Fast = 1;
	return true;			return true;
	}			}

	bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(			bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
	MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const {			MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
	if (!Subtarget.isHVXVectorType(VecTy))			if (!Subtarget.isHVXVectorType(VecTy))
	return false;			return false;
	// XXX Should this be false? vmemu are a bit slower than vmem.			// XXX Should this be false? vmemu are a bit slower than vmem.
	if (Fast)			if (Fast)
	*Fast = true;			*Fast = 1;
	return true;			return true;
	}			}

	void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(			void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
	MachineInstr &MI, SDNode *Node) const {			MachineInstr &MI, SDNode *Node) const {
	unsigned Opc = MI.getOpcode();			unsigned Opc = MI.getOpcode();
	const TargetInstrInfo &TII = *Subtarget.getInstrInfo();			const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
	MachineBasicBlock &MB = *MI.getParent();			MachineBasicBlock &MB = *MI.getParent();
	▲ Show 20 Lines • Show All 3,040 Lines • Show Last 20 Lines

llvm/lib/Target/Mips/Mips16ISelLowering.h

Show All 18 Lines	namespace llvm {
class Mips16TargetLowering : public MipsTargetLowering {		class Mips16TargetLowering : public MipsTargetLowering {
public:		public:
explicit Mips16TargetLowering(const MipsTargetMachine &TM,		explicit Mips16TargetLowering(const MipsTargetMachine &TM,
const MipsSubtarget &STI);		const MipsSubtarget &STI);

bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,		bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
Align Alignment,		Align Alignment,
MachineMemOperand::Flags Flags,		MachineMemOperand::Flags Flags,
bool *Fast) const override;		unsigned *Fast) const override;

MachineBasicBlock *		MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,		EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const override;		MachineBasicBlock *MBB) const override;

private:		private:
bool isEligibleForTailCallOptimization(		bool isEligibleForTailCallOptimization(
const CCState &CCInfo, unsigned NextStackOffset,		const CCState &CCInfo, unsigned NextStackOffset,
▲ Show 20 Lines • Show All 47 Lines • Show Last 20 Lines

llvm/lib/Target/Mips/Mips16ISelLowering.cpp

	Show First 20 Lines • Show All 150 Lines • ▼ Show 20 Lines

	const MipsTargetLowering *			const MipsTargetLowering *
	llvm::createMips16TargetLowering(const MipsTargetMachine &TM,			llvm::createMips16TargetLowering(const MipsTargetMachine &TM,
	const MipsSubtarget &STI) {			const MipsSubtarget &STI) {
	return new Mips16TargetLowering(TM, STI);			return new Mips16TargetLowering(TM, STI);
	}			}

	bool Mips16TargetLowering::allowsMisalignedMemoryAccesses(			bool Mips16TargetLowering::allowsMisalignedMemoryAccesses(
	EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {			EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
	return false;			return false;
	}			}

	MachineBasicBlock *			MachineBasicBlock *
	Mips16TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,			Mips16TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
	MachineBasicBlock *BB) const {			MachineBasicBlock *BB) const {
	switch (MI.getOpcode()) {			switch (MI.getOpcode()) {
	default:			default:
	▲ Show 20 Lines • Show All 620 Lines • Show Last 20 Lines

llvm/lib/Target/Mips/MipsSEISelLowering.h

Show All 37 Lines	public:
/// Enable MSA support for the given floating-point type and		/// Enable MSA support for the given floating-point type and
/// Register class.		/// Register class.
void addMSAFloatType(MVT::SimpleValueType Ty,		void addMSAFloatType(MVT::SimpleValueType Ty,
const TargetRegisterClass *RC);		const TargetRegisterClass *RC);

bool allowsMisalignedMemoryAccesses(		bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AS = 0, Align Alignment = Align(1),		EVT VT, unsigned AS = 0, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,		MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *Fast = nullptr) const override;		unsigned *Fast = nullptr) const override;

SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;		SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;

SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;		SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;

MachineBasicBlock *		MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,		EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const override;		MachineBasicBlock *MBB) const override;
▲ Show 20 Lines • Show All 89 Lines • Show Last 20 Lines

llvm/lib/Target/Mips/MipsSEISelLowering.cpp

Show First 20 Lines • Show All 409 Lines • ▼ Show 20 Lines	SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// floating point register are undefined. Not really an issue as sel.d, which		// floating point register are undefined. Not really an issue as sel.d, which
// is produced from an FSELECT node, only looks at bit 0.		// is produced from an FSELECT node, only looks at bit 0.
SDValue Tmp = DAG.getNode(MipsISD::MTC1_D64, DL, MVT::f64, Op->getOperand(0));		SDValue Tmp = DAG.getNode(MipsISD::MTC1_D64, DL, MVT::f64, Op->getOperand(0));
return DAG.getNode(MipsISD::FSELECT, DL, ResTy, Tmp, Op->getOperand(1),		return DAG.getNode(MipsISD::FSELECT, DL, ResTy, Tmp, Op->getOperand(1),
Op->getOperand(2));		Op->getOperand(2));
}		}

bool MipsSETargetLowering::allowsMisalignedMemoryAccesses(		bool MipsSETargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {		EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;		MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;

if (Subtarget.systemSupportsUnalignedAccess()) {		if (Subtarget.systemSupportsUnalignedAccess()) {
// MIPS32r6/MIPS64r6 is required to support unaligned access. It's		// MIPS32r6/MIPS64r6 is required to support unaligned access. It's
// implementation defined whether this is handled by hardware, software, or		// implementation defined whether this is handled by hardware, software, or
// a hybrid of the two but it's expected that most implementations will		// a hybrid of the two but it's expected that most implementations will
// handle the majority of cases in hardware.		// handle the majority of cases in hardware.
if (Fast)		if (Fast)
*Fast = true;		*Fast = 1;
return true;		return true;
}		}

switch (SVT) {		switch (SVT) {
case MVT::i64:		case MVT::i64:
case MVT::i32:		case MVT::i32:
if (Fast)		if (Fast)
*Fast = true;		*Fast = 1;
return true;		return true;
default:		default:
return false;		return false;
}		}
}		}

SDValue MipsSETargetLowering::LowerOperation(SDValue Op,		SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
▲ Show 20 Lines • Show All 3,414 Lines • Show Last 20 Lines

llvm/lib/Target/PowerPC/PPCISelLowering.h

Show First 20 Lines • Show All 1,070 Lines • ▼ Show 20 Lines	public:
EVT getOptimalMemOpType(const MemOp &Op,		EVT getOptimalMemOpType(const MemOp &Op,
const AttributeList &FuncAttributes) const override;		const AttributeList &FuncAttributes) const override;

/// Is unaligned memory access allowed for the given type, and is it fast		/// Is unaligned memory access allowed for the given type, and is it fast
/// relative to software emulation.		/// relative to software emulation.
bool allowsMisalignedMemoryAccesses(		bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment = Align(1),		EVT VT, unsigned AddrSpace, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,		MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *Fast = nullptr) const override;		unsigned *Fast = nullptr) const override;

/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster		/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be		/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
/// expanded to FMAs when this method returns true, otherwise fmuladd is		/// expanded to FMAs when this method returns true, otherwise fmuladd is
/// expanded to fmul + fadd.		/// expanded to fmul + fadd.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,		bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const override;		EVT VT) const override;

▲ Show 20 Lines • Show All 413 Lines • Show Last 20 Lines

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 16,705 Lines • ▼ Show 20 Lines
}		}

bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {		bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
return isInt<16>(Imm) \|\| isUInt<16>(Imm);		return isInt<16>(Imm) \|\| isUInt<16>(Imm);
}		}

bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,		bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
MachineMemOperand::Flags,		MachineMemOperand::Flags,
bool *Fast) const {		unsigned *Fast) const {
if (DisablePPCUnaligned)		if (DisablePPCUnaligned)
return false;		return false;

// PowerPC supports unaligned memory access for simple non-vector types.		// PowerPC supports unaligned memory access for simple non-vector types.
// Although accessing unaligned addresses is not as efficient as accessing		// Although accessing unaligned addresses is not as efficient as accessing
// aligned addresses, it is generally more efficient than manual expansion,		// aligned addresses, it is generally more efficient than manual expansion,
// and generally only traps for software emulation when crossing page		// and generally only traps for software emulation when crossing page
// boundaries.		// boundaries.
Show All 14 Lines	if (Subtarget.hasVSX()) {
return false;		return false;
}		}
}		}

if (VT == MVT::ppcf128)		if (VT == MVT::ppcf128)
return false;		return false;

if (Fast)		if (Fast)
*Fast = true;		*Fast = 1;

return true;		return true;
}		}

bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,		bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
SDValue C) const {		SDValue C) const {
// Check integral scalar types.		// Check integral scalar types.
if (!VT.isScalarInteger())		if (!VT.isScalarInteger())
▲ Show 20 Lines • Show All 1,502 Lines • Show Last 20 Lines

llvm/lib/Target/RISCV/RISCVISelLowering.h

Show First 20 Lines • Show All 537 Lines • ▼ Show 20 Lines	Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder,
Value NewVal, Value Mask,		Value NewVal, Value Mask,
AtomicOrdering Ord) const override;		AtomicOrdering Ord) const override;

/// Returns true if the target allows unaligned memory accesses of the		/// Returns true if the target allows unaligned memory accesses of the
/// specified type.		/// specified type.
bool allowsMisalignedMemoryAccesses(		bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),		EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,		MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *Fast = nullptr) const override;		unsigned *Fast = nullptr) const override;

bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL,		bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL,
SDValue Val, SDValue *Parts,		SDValue Val, SDValue *Parts,
unsigned NumParts, MVT PartVT,		unsigned NumParts, MVT PartVT,
Optional<CallingConv::ID> CC) const override;		Optional<CallingConv::ID> CC) const override;

SDValue		SDValue
joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL,		joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL,
▲ Show 20 Lines • Show All 198 Lines • Show Last 20 Lines

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 13,260 Lines • ▼ Show 20 Lines	if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
return false;		return false;

// Default to true and let the DAGCombiner decide.		// Default to true and let the DAGCombiner decide.
return true;		return true;
}		}

bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(		bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,		EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {		unsigned *Fast) const {
if (!VT.isVector()) {		if (!VT.isVector()) {
if (Fast)		if (Fast)
*Fast = false;		*Fast = 0;
return Subtarget.enableUnalignedScalarMem();		return Subtarget.enableUnalignedScalarMem();
}		}

// All vector implementations must support element alignment		// All vector implementations must support element alignment
EVT ElemVT = VT.getVectorElementType();		EVT ElemVT = VT.getVectorElementType();
if (Alignment >= ElemVT.getStoreSize()) {		if (Alignment >= ElemVT.getStoreSize()) {
if (Fast)		if (Fast)
*Fast = true;		*Fast = 1;
return true;		return true;
}		}

return false;		return false;
}		}

bool RISCVTargetLowering::splitValueIntoRegisterParts(		bool RISCVTargetLowering::splitValueIntoRegisterParts(
SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,		SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
▲ Show 20 Lines • Show All 130 Lines • Show Last 20 Lines

llvm/lib/Target/SystemZ/SystemZISelLowering.h

Show First 20 Lines • Show All 449 Lines • ▼ Show 20 Lines	public:
bool hasInlineStackProbe(const MachineFunction &MF) const override;		bool hasInlineStackProbe(const MachineFunction &MF) const override;
bool isLegalICmpImmediate(int64_t Imm) const override;		bool isLegalICmpImmediate(int64_t Imm) const override;
bool isLegalAddImmediate(int64_t Imm) const override;		bool isLegalAddImmediate(int64_t Imm) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,		bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS,		unsigned AS,
Instruction *I = nullptr) const override;		Instruction *I = nullptr) const override;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,		bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
MachineMemOperand::Flags Flags,		MachineMemOperand::Flags Flags,
bool *Fast) const override;		unsigned *Fast) const override;
bool		bool
findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,		findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
const MemOp &Op, unsigned DstAS, unsigned SrcAS,		const MemOp &Op, unsigned DstAS, unsigned SrcAS,
const AttributeList &FuncAttributes) const override;		const AttributeList &FuncAttributes) const override;
EVT getOptimalMemOpType(const MemOp &Op,		EVT getOptimalMemOpType(const MemOp &Op,
const AttributeList &FuncAttributes) const override;		const AttributeList &FuncAttributes) const override;
bool isTruncateFree(Type , Type ) const override;		bool isTruncateFree(Type , Type ) const override;
bool isTruncateFree(EVT, EVT) const override;		bool isTruncateFree(EVT, EVT) const override;
▲ Show 20 Lines • Show All 326 Lines • Show Last 20 Lines

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 855 Lines • ▼ Show 20 Lines
	}			}

	bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {			bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
	// We can use ALGFI or SLGFI.			// We can use ALGFI or SLGFI.
	return isUInt<32>(Imm) \|\| isUInt<32>(-Imm);			return isUInt<32>(Imm) \|\| isUInt<32>(-Imm);
	}			}

	bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(			bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
	EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {			EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
	// Unaligned accesses should never be slower than the expanded version.			// Unaligned accesses should never be slower than the expanded version.
	// We check specifically for aligned accesses in the few cases where			// We check specifically for aligned accesses in the few cases where
	// they are required.			// they are required.
	if (Fast)			if (Fast)
	*Fast = true;			*Fast = 1;
	return true;			return true;
	}			}

	// Information about the addressing mode for a memory access.			// Information about the addressing mode for a memory access.
	struct AddressingMode {			struct AddressingMode {
	// True if a long displacement is supported.			// True if a long displacement is supported.
	bool LongDisplacement;			bool LongDisplacement;

	▲ Show 20 Lines • Show All 8,162 Lines • Show Last 20 Lines

llvm/lib/Target/VE/VEISelLowering.h

Show First 20 Lines • Show All 217 Lines • ▼ Show 20 Lines	public:

bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;		bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,		bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;		bool ForCodeSize) const override;
/// Returns true if the target allows unaligned memory accesses of the		/// Returns true if the target allows unaligned memory accesses of the
/// specified type.		/// specified type.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align A,		bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align A,
MachineMemOperand::Flags Flags,		MachineMemOperand::Flags Flags,
bool *Fast) const override;		unsigned *Fast) const override;

/// Inline Assembly {		/// Inline Assembly {

ConstraintType getConstraintType(StringRef Constraint) const override;		ConstraintType getConstraintType(StringRef Constraint) const override;
std::pair<unsigned, const TargetRegisterClass *>		std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,		getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;		StringRef Constraint, MVT VT) const override;

Show All 23 Lines

llvm/lib/Target/VE/VEISelLowering.cpp

	Show First 20 Lines • Show All 881 Lines • ▼ Show 20 Lines
	/// reference. This is used, for example, in situations where an array			/// reference. This is used, for example, in situations where an array
	/// copy/move/set is converted to a sequence of store operations. Its use			/// copy/move/set is converted to a sequence of store operations. Its use
	/// helps to ensure that such replacements don't generate code that causes an			/// helps to ensure that such replacements don't generate code that causes an
	/// alignment error (trap) on the target machine.			/// alignment error (trap) on the target machine.
	bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,			bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
	unsigned AddrSpace,			unsigned AddrSpace,
	Align A,			Align A,
	MachineMemOperand::Flags,			MachineMemOperand::Flags,
	bool *Fast) const {			unsigned *Fast) const {
	if (Fast) {			if (Fast) {
	// It's fast anytime on VE			// It's fast anytime on VE
	*Fast = true;			*Fast = 1;
	}			}
	return true;			return true;
	}			}

	VETargetLowering::VETargetLowering(const TargetMachine &TM,			VETargetLowering::VETargetLowering(const TargetMachine &TM,
	const VESubtarget &STI)			const VESubtarget &STI)
	: TargetLowering(TM), Subtarget(&STI) {			: TargetLowering(TM), Subtarget(&STI) {
	// Instructions which use registers as conditionals examine all the			// Instructions which use registers as conditionals examine all the
	▲ Show 20 Lines • Show All 2,318 Lines • Show Last 20 Lines

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h

Show First 20 Lines • Show All 66 Lines • ▼ Show 20 Lines	getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;		StringRef Constraint, MVT VT) const override;
bool isCheapToSpeculateCttz(Type *Ty) const override;		bool isCheapToSpeculateCttz(Type *Ty) const override;
bool isCheapToSpeculateCtlz(Type *Ty) const override;		bool isCheapToSpeculateCtlz(Type *Ty) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,		bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS,		unsigned AS,
Instruction *I = nullptr) const override;		Instruction *I = nullptr) const override;
bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, Align Alignment,		bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, Align Alignment,
MachineMemOperand::Flags Flags,		MachineMemOperand::Flags Flags,
bool *Fast) const override;		unsigned *Fast) const override;
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;		bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;		bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;		bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,		EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;		EVT VT) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,		bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
MachineFunction &MF,		MachineFunction &MF,
unsigned Intrinsic) const override;		unsigned Intrinsic) const override;
▲ Show 20 Lines • Show All 78 Lines • Show Last 20 Lines

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Show First 20 Lines • Show All 775 Lines • ▼ Show 20 Lines	if (AM.Scale != 0)
return false;		return false;

// Everything else is legal.		// Everything else is legal.
return true;		return true;
}		}

bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(		bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
EVT /VT/, unsigned /AddrSpace/, Align /Align/,		EVT /VT/, unsigned /AddrSpace/, Align /Align/,
MachineMemOperand::Flags /Flags/, bool *Fast) const {		MachineMemOperand::Flags /Flags/, unsigned *Fast) const {
// WebAssembly supports unaligned accesses, though it should be declared		// WebAssembly supports unaligned accesses, though it should be declared
// with the p2align attribute on loads and stores which do so, and there		// with the p2align attribute on loads and stores which do so, and there
// may be a performance impact. We tell LLVM they're "fast" because		// may be a performance impact. We tell LLVM they're "fast" because
// for the kinds of things that LLVM uses this for (merging adjacent stores		// for the kinds of things that LLVM uses this for (merging adjacent stores
// of constants, etc.), WebAssembly implementations will either want the		// of constants, etc.), WebAssembly implementations will either want the
// unaligned access or they'll split anyway.		// unaligned access or they'll split anyway.
if (Fast)		if (Fast)
*Fast = true;		*Fast = 1;
return true;		return true;
}		}

bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,		bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
AttributeList Attr) const {		AttributeList Attr) const {
// The current thinking is that wasm engines will perform this optimization,		// The current thinking is that wasm engines will perform this optimization,
// so we can save on code size.		// so we can save on code size.
return true;		return true;
▲ Show 20 Lines • Show All 1,851 Lines • Show Last 20 Lines

llvm/lib/Target/X86/X86ISelLowering.h

Show First 20 Lines • Show All 999 Lines • ▼ Show 20 Lines	public:
/// also does type conversion. Note the specified type doesn't have to be		/// also does type conversion. Note the specified type doesn't have to be
/// legal as the hook is used before type legalization.		/// legal as the hook is used before type legalization.
bool isSafeMemOpType(MVT VT) const override;		bool isSafeMemOpType(MVT VT) const override;

/// Returns true if the target allows unaligned memory accesses of the		/// Returns true if the target allows unaligned memory accesses of the
/// specified type. Returns whether it is "fast" in the last argument.		/// specified type. Returns whether it is "fast" in the last argument.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,		bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
MachineMemOperand::Flags Flags,		MachineMemOperand::Flags Flags,
bool *Fast) const override;		unsigned *Fast) const override;

/// Provide custom lowering hooks for some operations.		/// Provide custom lowering hooks for some operations.
///		///
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;		SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;

/// Replace the results of node with an illegal result		/// Replace the results of node with an illegal result
/// type with new values built out of custom code.		/// type with new values built out of custom code.
///		///
▲ Show 20 Lines • Show All 808 Lines • Show Last 20 Lines

llvm/lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 2,724 Lines • ▼ Show 20 Lines	if (VT == MVT::f32)
return Subtarget.hasSSE1();		return Subtarget.hasSSE1();
if (VT == MVT::f64)		if (VT == MVT::f64)
return Subtarget.hasSSE2();		return Subtarget.hasSSE2();
return true;		return true;
}		}

bool X86TargetLowering::allowsMisalignedMemoryAccesses(		bool X86TargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,		EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {		unsigned *Fast) const {
if (Fast) {		if (Fast) {
switch (VT.getSizeInBits()) {		switch (VT.getSizeInBits()) {
default:		default:
// 8-byte and under are always assumed to be fast.		// 8-byte and under are always assumed to be fast.
*Fast = true;		*Fast = 1;
break;		break;
case 128:		case 128:
*Fast = !Subtarget.isUnalignedMem16Slow();		*Fast = !Subtarget.isUnalignedMem16Slow();
break;		break;
case 256:		case 256:
*Fast = !Subtarget.isUnalignedMem32Slow();		*Fast = !Subtarget.isUnalignedMem32Slow();
break;		break;
// TODO: What about AVX-512 (512-bit) accesses?		// TODO: What about AVX-512 (512-bit) accesses?
▲ Show 20 Lines • Show All 46,876 Lines • ▼ Show 20 Lines	static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
EVT MemVT = Ld->getMemoryVT();		EVT MemVT = Ld->getMemoryVT();
SDLoc dl(Ld);		SDLoc dl(Ld);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();		const TargetLowering &TLI = DAG.getTargetLoweringInfo();

// For chips with slow 32-byte unaligned loads, break the 32-byte operation		// For chips with slow 32-byte unaligned loads, break the 32-byte operation
// into two 16-byte operations. Also split non-temporal aligned loads on		// into two 16-byte operations. Also split non-temporal aligned loads on
// pre-AVX2 targets as 32-byte loads will lower to regular temporal loads.		// pre-AVX2 targets as 32-byte loads will lower to regular temporal loads.
ISD::LoadExtType Ext = Ld->getExtensionType();		ISD::LoadExtType Ext = Ld->getExtensionType();
bool Fast;		unsigned Fast;
if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() &&		if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() &&
Ext == ISD::NON_EXTLOAD &&		Ext == ISD::NON_EXTLOAD &&
((Ld->isNonTemporal() && !Subtarget.hasInt256() &&		((Ld->isNonTemporal() && !Subtarget.hasInt256() &&
Ld->getAlign() >= Align(16)) \|\|		Ld->getAlign() >= Align(16)) \|\|
(TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT,		(TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT,
*Ld->getMemOperand(), &Fast) &&		*Ld->getMemOperand(), &Fast) &&
!Fast))) {		!Fast))) {
unsigned NumElems = RegVT.getVectorNumElements();		unsigned NumElems = RegVT.getVectorNumElements();
▲ Show 20 Lines • Show All 441 Lines • ▼ Show 20 Lines	if ((VT == MVT::v8i1 \|\| VT == MVT::v16i1 \|\| VT == MVT::v32i1 \|\|
StoredVal = combinevXi1ConstantToInteger(StoredVal, DAG);		StoredVal = combinevXi1ConstantToInteger(StoredVal, DAG);
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),		return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
St->getPointerInfo(), St->getOriginalAlign(),		St->getPointerInfo(), St->getOriginalAlign(),
St->getMemOperand()->getFlags());		St->getMemOperand()->getFlags());
}		}

// If we are saving a 32-byte vector and 32-byte stores are slow, such as on		// If we are saving a 32-byte vector and 32-byte stores are slow, such as on
// Sandy Bridge, perform two 16-byte stores.		// Sandy Bridge, perform two 16-byte stores.
bool Fast;		unsigned Fast;
if (VT.is256BitVector() && StVT == VT &&		if (VT.is256BitVector() && StVT == VT &&
TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,		TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
*St->getMemOperand(), &Fast) &&		*St->getMemOperand(), &Fast) &&
!Fast) {		!Fast) {
unsigned NumElems = VT.getVectorNumElements();		unsigned NumElems = VT.getVectorNumElements();
if (NumElems < 2)		if (NumElems < 2)
return SDValue();		return SDValue();

▲ Show 20 Lines • Show All 4,719 Lines • ▼ Show 20 Lines	case X86ISD::BLENDV:
}		}
break;		break;
}		}
}		}

// Fold subvector loads into one.		// Fold subvector loads into one.
// If needed, look through bitcasts to get to the load.		// If needed, look through bitcasts to get to the load.
if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(Op0))) {		if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(Op0))) {
bool Fast;		unsigned Fast;
const X86TargetLowering *TLI = Subtarget.getTargetLowering();		const X86TargetLowering *TLI = Subtarget.getTargetLowering();
if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,		if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
*FirstLd->getMemOperand(), &Fast) &&		*FirstLd->getMemOperand(), &Fast) &&
Fast) {		Fast) {
if (SDValue Ld =		if (SDValue Ld =
EltsFromConsecutiveLoads(VT, Ops, DL, DAG, Subtarget, false))		EltsFromConsecutiveLoads(VT, Ops, DL, DAG, Subtarget, false))
return Ld;		return Ld;
}		}
▲ Show 20 Lines • Show All 2,358 Lines • Show Last 20 Lines

llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Show First 20 Lines • Show All 769 Lines • ▼ Show 20 Lines	static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,

IntegerType *WiderType = IntegerType::get(I.getContext(), LOps.LoadSize);		IntegerType *WiderType = IntegerType::get(I.getContext(), LOps.LoadSize);
// TTI based checks if we want to proceed with wider load		// TTI based checks if we want to proceed with wider load
bool Allowed = TTI.isTypeLegal(WiderType);		bool Allowed = TTI.isTypeLegal(WiderType);
if (!Allowed)		if (!Allowed)
return false;		return false;

unsigned AS = LI1->getPointerAddressSpace();		unsigned AS = LI1->getPointerAddressSpace();
bool Fast = false;		unsigned Fast = 0;
Allowed = TTI.allowsMisalignedMemoryAccesses(I.getContext(), LOps.LoadSize,		Allowed = TTI.allowsMisalignedMemoryAccesses(I.getContext(), LOps.LoadSize,
AS, LI1->getAlign(), &Fast);		AS, LI1->getAlign(), &Fast);
if (!Allowed \|\| !Fast)		if (!Allowed \|\| !Fast)
return false;		return false;

// New load can be generated		// New load can be generated
Value *Load1Ptr = LI1->getPointerOperand();		Value *Load1Ptr = LI1->getPointerOperand();
Builder.SetInsertPoint(LI1);		Builder.SetInsertPoint(LI1);
▲ Show 20 Lines • Show All 92 Lines • Show Last 20 Lines

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

Show First 20 Lines • Show All 1,314 Lines • ▼ Show 20 Lines	bool Vectorizer::vectorizeLoadChain(
return true;		return true;
}		}

bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,		bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
Align Alignment) {		Align Alignment) {
if (Alignment.value() % SzInBytes == 0)		if (Alignment.value() % SzInBytes == 0)
return false;		return false;

bool Fast = false;		unsigned Fast = 0;
		mariusz-sikora-at-amdUnsubmitted Done Reply Inline Actions false -> 0 mariusz-sikora-at-amd: false -> 0
		rampitecAuthorUnsubmitted Done Reply Inline Actions Thanks! rampitec: Thanks!
bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(),		bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(),
SzInBytes * 8, AddressSpace,		SzInBytes * 8, AddressSpace,
Alignment, &Fast);		Alignment, &Fast);
LLVM_DEBUG(dbgs() << "LSV: Target said misaligned is allowed? " << Allows		LLVM_DEBUG(dbgs() << "LSV: Target said misaligned is allowed? " << Allows
<< " and fast? " << Fast << "\n";);		<< " and fast? " << Fast << "\n";);
return !Allows \|\| !Fast;		return !Allows \|\| !Fast;
}		}

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Allow finer grain control of an unaligned access speedClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 476159

llvm/include/llvm/Analysis/TargetTransformInfo.h

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

llvm/include/llvm/CodeGen/BasicTTIImpl.h

llvm/include/llvm/CodeGen/TargetLowering.h

llvm/lib/Analysis/TargetTransformInfo.cpp

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

llvm/lib/CodeGen/TargetLoweringBase.cpp

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

llvm/lib/Target/AMDGPU/R600ISelLowering.h

llvm/lib/Target/AMDGPU/R600ISelLowering.cpp

llvm/lib/Target/AMDGPU/SIISelLowering.h

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

llvm/lib/Target/ARM/ARMISelLowering.h

llvm/lib/Target/ARM/ARMISelLowering.cpp

llvm/lib/Target/Hexagon/HexagonISelLowering.h

llvm/lib/Target/Hexagon/HexagonISelLowering.cpp

llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp

llvm/lib/Target/Mips/Mips16ISelLowering.h

llvm/lib/Target/Mips/Mips16ISelLowering.cpp

llvm/lib/Target/Mips/MipsSEISelLowering.h

llvm/lib/Target/Mips/MipsSEISelLowering.cpp

llvm/lib/Target/PowerPC/PPCISelLowering.h

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

llvm/lib/Target/RISCV/RISCVISelLowering.h

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

llvm/lib/Target/SystemZ/SystemZISelLowering.h

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

llvm/lib/Target/VE/VEISelLowering.h

llvm/lib/Target/VE/VEISelLowering.cpp

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

llvm/lib/Target/X86/X86ISelLowering.h

llvm/lib/Target/X86/X86ISelLowering.cpp

llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

[AMDGPU] Allow finer grain control of an unaligned access speed
ClosedPublic