Diff 66821

llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h

Show First 20 Lines • Show All 383 Lines • ▼ Show 20 Lines	public:
/// floating-point semantics may differ. For example, ARM NEON v7 SIMD math		/// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
/// does not support IEEE-754 denormal numbers, while depending on the		/// does not support IEEE-754 denormal numbers, while depending on the
/// platform, scalar floating-point math does.		/// platform, scalar floating-point math does.
/// This applies to floating-point math operations and calls, not memory		/// This applies to floating-point math operations and calls, not memory
/// operations, shuffles, or casts.		/// operations, shuffles, or casts.
bool isFPVectorizationPotentiallyUnsafe() const;		bool isFPVectorizationPotentiallyUnsafe() const;

/// \brief Determine if the target supports unaligned memory accesses.		/// \brief Determine if the target supports unaligned memory accesses.
bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace = 0,		bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
		unsigned BitWidth, unsigned AddressSpace = 0,
unsigned Alignment = 1,		unsigned Alignment = 1,
bool *Fast = nullptr) const;		bool *Fast = nullptr) const;

/// \brief Return hardware support for population count.		/// \brief Return hardware support for population count.
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;		PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;

/// \brief Return true if the hardware has a fast square-root instruction.		/// \brief Return true if the hardware has a fast square-root instruction.
bool haveFastSqrt(Type *Ty) const;		bool haveFastSqrt(Type *Ty) const;
▲ Show 20 Lines • Show All 262 Lines • ▼ Show 20 Lines	public:
virtual bool isProfitableToHoist(Instruction *I) = 0;		virtual bool isProfitableToHoist(Instruction *I) = 0;
virtual bool isTypeLegal(Type *Ty) = 0;		virtual bool isTypeLegal(Type *Ty) = 0;
virtual unsigned getJumpBufAlignment() = 0;		virtual unsigned getJumpBufAlignment() = 0;
virtual unsigned getJumpBufSize() = 0;		virtual unsigned getJumpBufSize() = 0;
virtual bool shouldBuildLookupTables() = 0;		virtual bool shouldBuildLookupTables() = 0;
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;		virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
virtual bool enableInterleavedAccessVectorization() = 0;		virtual bool enableInterleavedAccessVectorization() = 0;
virtual bool isFPVectorizationPotentiallyUnsafe() = 0;		virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
virtual bool allowsMisalignedMemoryAccesses(unsigned BitWidth,		virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
		unsigned BitWidth,
unsigned AddressSpace,		unsigned AddressSpace,
unsigned Alignment,		unsigned Alignment,
bool *Fast) = 0;		bool *Fast) = 0;
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;		virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
virtual bool haveFastSqrt(Type *Ty) = 0;		virtual bool haveFastSqrt(Type *Ty) = 0;
virtual int getFPOpCost(Type *Ty) = 0;		virtual int getFPOpCost(Type *Ty) = 0;
virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,		virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
Type *Ty) = 0;		Type *Ty) = 0;
▲ Show 20 Lines • Show All 156 Lines • ▼ Show 20 Lines	bool enableAggressiveInterleaving(bool LoopHasReductions) override {
return Impl.enableAggressiveInterleaving(LoopHasReductions);		return Impl.enableAggressiveInterleaving(LoopHasReductions);
}		}
bool enableInterleavedAccessVectorization() override {		bool enableInterleavedAccessVectorization() override {
return Impl.enableInterleavedAccessVectorization();		return Impl.enableInterleavedAccessVectorization();
}		}
bool isFPVectorizationPotentiallyUnsafe() override {		bool isFPVectorizationPotentiallyUnsafe() override {
return Impl.isFPVectorizationPotentiallyUnsafe();		return Impl.isFPVectorizationPotentiallyUnsafe();
}		}
bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace,		bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
		unsigned BitWidth, unsigned AddressSpace,
unsigned Alignment, bool *Fast) override {		unsigned Alignment, bool *Fast) override {
return Impl.allowsMisalignedMemoryAccesses(BitWidth, AddressSpace,		return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
Alignment, Fast);		Alignment, Fast);
}		}
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {		PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
return Impl.getPopcntSupport(IntTyWidthInBit);		return Impl.getPopcntSupport(IntTyWidthInBit);
}		}
bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }		bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }

int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }		int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
▲ Show 20 Lines • Show All 228 Lines • Show Last 20 Lines

llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h

Show First 20 Lines • Show All 247 Lines • ▼ Show 20 Lines	public:
bool shouldBuildLookupTables() { return true; }		bool shouldBuildLookupTables() { return true; }

bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }		bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }

bool enableInterleavedAccessVectorization() { return false; }		bool enableInterleavedAccessVectorization() { return false; }

bool isFPVectorizationPotentiallyUnsafe() { return false; }		bool isFPVectorizationPotentiallyUnsafe() { return false; }

bool allowsMisalignedMemoryAccesses(unsigned BitWidth,		bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
		unsigned BitWidth,
unsigned AddressSpace,		unsigned AddressSpace,
unsigned Alignment,		unsigned Alignment,
bool *Fast) { return false; }		bool *Fast) { return false; }

TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {		TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {
return TTI::PSK_Software;		return TTI::PSK_Software;
}		}

▲ Show 20 Lines • Show All 286 Lines • Show Last 20 Lines

llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h

Show First 20 Lines • Show All 99 Lines • ▼ Show 20 Lines	public:
// Provide value semantics. MSVC requires that we spell all of these out.		// Provide value semantics. MSVC requires that we spell all of these out.
BasicTTIImplBase(const BasicTTIImplBase &Arg)		BasicTTIImplBase(const BasicTTIImplBase &Arg)
: BaseT(static_cast<const BaseT &>(Arg)) {}		: BaseT(static_cast<const BaseT &>(Arg)) {}
BasicTTIImplBase(BasicTTIImplBase &&Arg)		BasicTTIImplBase(BasicTTIImplBase &&Arg)
: BaseT(std::move(static_cast<BaseT &>(Arg))) {}		: BaseT(std::move(static_cast<BaseT &>(Arg))) {}

/// \name Scalar TTI Implementations		/// \name Scalar TTI Implementations
/// @{		/// @{
bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace,		bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
		unsigned BitWidth, unsigned AddressSpace,
unsigned Alignment, bool *Fast) const {		unsigned Alignment, bool *Fast) const {
MVT M = MVT::getIntegerVT(BitWidth);		EVT E = EVT::getIntegerVT(Context, BitWidth);
return getTLI()->allowsMisalignedMemoryAccesses(M, AddressSpace, Alignment, Fast);		return getTLI()->allowsMisalignedMemoryAccesses(E, AddressSpace, Alignment, Fast);
}		}

bool hasBranchDivergence() { return false; }		bool hasBranchDivergence() { return false; }

bool isSourceOfDivergence(const Value *V) { return false; }		bool isSourceOfDivergence(const Value *V) { return false; }

bool isLegalAddImmediate(int64_t imm) {		bool isLegalAddImmediate(int64_t imm) {
return getTLI()->isLegalAddImmediate(imm);		return getTLI()->isLegalAddImmediate(imm);
▲ Show 20 Lines • Show All 846 Lines • Show Last 20 Lines

llvm/trunk/lib/Analysis/TargetTransformInfo.cpp

	Show First 20 Lines • Show All 180 Lines • ▼ Show 20 Lines
	bool TargetTransformInfo::enableInterleavedAccessVectorization() const {			bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
	return TTIImpl->enableInterleavedAccessVectorization();			return TTIImpl->enableInterleavedAccessVectorization();
	}			}

	bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {			bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
	return TTIImpl->isFPVectorizationPotentiallyUnsafe();			return TTIImpl->isFPVectorizationPotentiallyUnsafe();
	}			}

	bool TargetTransformInfo::allowsMisalignedMemoryAccesses(unsigned BitWidth,			bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context,
				unsigned BitWidth,
	unsigned AddressSpace,			unsigned AddressSpace,
	unsigned Alignment,			unsigned Alignment,
	bool *Fast) const {			bool *Fast) const {
	return TTIImpl->allowsMisalignedMemoryAccesses(BitWidth, AddressSpace,			return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
	Alignment, Fast);			Alignment, Fast);
	}			}

	TargetTransformInfo::PopcntSupportKind			TargetTransformInfo::PopcntSupportKind
	TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {			TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
	return TTIImpl->getPopcntSupport(IntTyWidthInBit);			return TTIImpl->getPopcntSupport(IntTyWidthInBit);
	}			}

	▲ Show 20 Lines • Show All 267 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp

Show First 20 Lines • Show All 433 Lines • ▼ Show 20 Lines	bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
unsigned AddrSpace,		unsigned AddrSpace,
unsigned Align,		unsigned Align,
bool *IsFast) const {		bool *IsFast) const {
if (IsFast)		if (IsFast)
*IsFast = false;		*IsFast = false;

// TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,		// TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
// which isn't a simple VT.		// which isn't a simple VT.
if (!VT.isSimple() \|\| VT == MVT::Other)		// Until MVT is extended to handle this, simply check for the size and
		// rely on the condition below: allow accesses if the size is a multiple of 4.
		if (VT == MVT::Other \|\| (VT != MVT::Other && VT.getSizeInBits() > 1024 &&
		VT.getStoreSize() > 16)) {
return false;		return false;
		}

if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS \|\|		if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS \|\|
AddrSpace == AMDGPUAS::REGION_ADDRESS) {		AddrSpace == AMDGPUAS::REGION_ADDRESS) {
// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte		// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
// aligned, 8 byte access in a single operation using ds_read2/write2_b32		// aligned, 8 byte access in a single operation using ds_read2/write2_b32
// with adjacent offsets.		// with adjacent offsets.
bool AlignedBy4 = (Align % 4 == 0);		bool AlignedBy4 = (Align % 4 == 0);
if (IsFast)		if (IsFast)
▲ Show 20 Lines • Show All 3,334 Lines • Show Last 20 Lines

llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

Show All 34 Lines
using namespace llvm;		using namespace llvm;

#define DEBUG_TYPE "load-store-vectorizer"		#define DEBUG_TYPE "load-store-vectorizer"
STATISTIC(NumVectorInstructions, "Number of vector accesses generated");		STATISTIC(NumVectorInstructions, "Number of vector accesses generated");
STATISTIC(NumScalarsVectorized, "Number of scalar accesses vectorized");		STATISTIC(NumScalarsVectorized, "Number of scalar accesses vectorized");

namespace {		namespace {

// TODO: Remove this		// FIXME: Assuming stack alignment of 4 is always good enough
static const unsigned TargetBaseAlign = 4;		static const unsigned StackAdjustedAlignment = 4;

typedef SmallVector<Instruction *, 8> InstrList;		typedef SmallVector<Instruction *, 8> InstrList;
typedef MapVector<Value *, InstrList> InstrListMap;		typedef MapVector<Value *, InstrList> InstrListMap;

class Vectorizer {		class Vectorizer {
Function &F;		Function &F;
AliasAnalysis &AA;		AliasAnalysis &AA;
DominatorTree &DT;		DominatorTree &DT;
ScalarEvolution &SE;		ScalarEvolution &SE;
▲ Show 20 Lines • Show All 739 Lines • ▼ Show 20 Lines	bool Vectorizer::vectorizeStoreChain(
if (accessIsMisaligned(SzInBytes, AS, Alignment)) {		if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
if (S0->getPointerAddressSpace() != 0)		if (S0->getPointerAddressSpace() != 0)
return false;		return false;

// If we're storing to an object on the stack, we control its alignment,		// If we're storing to an object on the stack, we control its alignment,
// so we can cheat and change it!		// so we can cheat and change it!
Value *V = GetUnderlyingObject(S0->getPointerOperand(), DL);		Value *V = GetUnderlyingObject(S0->getPointerOperand(), DL);
if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V)) {		if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V)) {
AI->setAlignment(TargetBaseAlign);		AI->setAlignment(StackAdjustedAlignment);
Alignment = TargetBaseAlign;		Alignment = StackAdjustedAlignment;
} else {		} else {
return false;		return false;
}		}
}		}

BasicBlock::iterator First, Last;		BasicBlock::iterator First, Last;
std::tie(First, Last) = getBoundaryInstrs(Chain);		std::tie(First, Last) = getBoundaryInstrs(Chain);
Builder.SetInsertPoint(&*Last);		Builder.SetInsertPoint(&*Last);
▲ Show 20 Lines • Show All 132 Lines • ▼ Show 20 Lines	bool Vectorizer::vectorizeLoadChain(
if (accessIsMisaligned(SzInBytes, AS, Alignment)) {		if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
if (L0->getPointerAddressSpace() != 0)		if (L0->getPointerAddressSpace() != 0)
return false;		return false;

// If we're loading from an object on the stack, we control its alignment,		// If we're loading from an object on the stack, we control its alignment,
// so we can cheat and change it!		// so we can cheat and change it!
Value *V = GetUnderlyingObject(L0->getPointerOperand(), DL);		Value *V = GetUnderlyingObject(L0->getPointerOperand(), DL);
if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V)) {		if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V)) {
AI->setAlignment(TargetBaseAlign);		AI->setAlignment(StackAdjustedAlignment);
Alignment = TargetBaseAlign;		Alignment = StackAdjustedAlignment;
} else {		} else {
return false;		return false;
}		}
}		}

DEBUG({		DEBUG({
dbgs() << "LSV: Loads to vectorize:\n";		dbgs() << "LSV: Loads to vectorize:\n";
for (Instruction *I : Chain)		for (Instruction *I : Chain)
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines	bool Vectorizer::vectorizeLoadChain(

++NumVectorInstructions;		++NumVectorInstructions;
NumScalarsVectorized += Chain.size();		NumScalarsVectorized += Chain.size();
return true;		return true;
}		}

bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,		bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
unsigned Alignment) {		unsigned Alignment) {
		if (Alignment % SzInBytes == 0)
		return false;
bool Fast = false;		bool Fast = false;
bool Allows = TTI.allowsMisalignedMemoryAccesses(SzInBytes * 8, AddressSpace,		bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(),
		SzInBytes * 8, AddressSpace,
Alignment, &Fast);		Alignment, &Fast);
// TODO: Remove TargetBaseAlign		DEBUG(dbgs() << "LSV: Target said misaligned is allowed? " << Allows
return !(Allows && Fast) && (Alignment % SzInBytes) != 0 &&		<< " and fast? " << Fast << "\n";);
(Alignment % TargetBaseAlign) != 0;		return !Allows \|\| !Fast;
}		}

llvm/trunk/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll

	Show All 16 Lines

	; CHECK-LABEL: @load_fn			; CHECK-LABEL: @load_fn
	; CHECK: load			; CHECK: load
	; CHECK: call void @fn()			; CHECK: call void @fn()
	; CHECK: load			; CHECK: load
	define void @load_fn(i32* %p) #0 {			define void @load_fn(i32* %p) #0 {
	%p.1 = getelementptr i32, i32* %p, i32 1			%p.1 = getelementptr i32, i32* %p, i32 1

	%v0 = load i32, i32* %p			%v0 = load i32, i32* %p, align 8
	call void @fn()			call void @fn()
	%v1 = load i32, i32* %p.1			%v1 = load i32, i32* %p.1, align 4
	ret void			ret void
	}			}

	; CHECK-LABEL: @load_fn_nounwind			; CHECK-LABEL: @load_fn_nounwind
	; CHECK: load			; CHECK: load
	; CHECK: call void @fn_nounwind()			; CHECK: call void @fn_nounwind()
	; CHECK: load			; CHECK: load
	define void @load_fn_nounwind(i32* %p) #0 {			define void @load_fn_nounwind(i32* %p) #0 {
	%p.1 = getelementptr i32, i32* %p, i32 1			%p.1 = getelementptr i32, i32* %p, i32 1

	%v0 = load i32, i32* %p			%v0 = load i32, i32* %p, align 8
	call void @fn_nounwind() #0			call void @fn_nounwind() #0
	%v1 = load i32, i32* %p.1			%v1 = load i32, i32* %p.1, align 4
	ret void			ret void
	}			}

	; CHECK-LABEL: @load_fn_nounwind_writeonly			; CHECK-LABEL: @load_fn_nounwind_writeonly
	; CHECK: load			; CHECK: load
	; CHECK: call void @fn_nounwind_writeonly()			; CHECK: call void @fn_nounwind_writeonly()
	; CHECK: load			; CHECK: load
	define void @load_fn_nounwind_writeonly(i32* %p) #0 {			define void @load_fn_nounwind_writeonly(i32* %p) #0 {
	%p.1 = getelementptr i32, i32* %p, i32 1			%p.1 = getelementptr i32, i32* %p, i32 1

	%v0 = load i32, i32* %p			%v0 = load i32, i32* %p, align 8
	call void @fn_nounwind_writeonly() #1			call void @fn_nounwind_writeonly() #1
	%v1 = load i32, i32* %p.1			%v1 = load i32, i32* %p.1, align 4
	ret void			ret void
	}			}

	; CHECK-LABEL: @load_fn_nounwind_readonly			; CHECK-LABEL: @load_fn_nounwind_readonly
	; CHECK-DAG: load <2 x i32>			; CHECK-DAG: load <2 x i32>
	; CHECK-DAG: call void @fn_nounwind_readonly()			; CHECK-DAG: call void @fn_nounwind_readonly()
	define void @load_fn_nounwind_readonly(i32* %p) #0 {			define void @load_fn_nounwind_readonly(i32* %p) #0 {
	%p.1 = getelementptr i32, i32* %p, i32 1			%p.1 = getelementptr i32, i32* %p, i32 1

	%v0 = load i32, i32* %p			%v0 = load i32, i32* %p, align 8
	call void @fn_nounwind_readonly() #2			call void @fn_nounwind_readonly() #2
	%v1 = load i32, i32* %p.1			%v1 = load i32, i32* %p.1, align 4
	ret void			ret void
	}			}

	; CHECK-LABEL: @load_fn_readonly			; CHECK-LABEL: @load_fn_readonly
	; CHECK: load			; CHECK: load
	; CHECK: call void @fn_readonly			; CHECK: call void @fn_readonly
	; CHECK: load			; CHECK: load
	define void @load_fn_readonly(i32* %p) #0 {			define void @load_fn_readonly(i32* %p) #0 {
	%p.1 = getelementptr i32, i32* %p, i32 1			%p.1 = getelementptr i32, i32* %p, i32 1

	%v0 = load i32, i32* %p			%v0 = load i32, i32* %p, align 8
	call void @fn_readonly() #4			call void @fn_readonly() #4
	%v1 = load i32, i32* %p.1			%v1 = load i32, i32* %p.1, align 4
	ret void			ret void
	}			}

	; CHECK-LABEL: @load_fn_writeonly			; CHECK-LABEL: @load_fn_writeonly
	; CHECK: load			; CHECK: load
	; CHECK: call void @fn_writeonly()			; CHECK: call void @fn_writeonly()
	; CHECK: load			; CHECK: load
	define void @load_fn_writeonly(i32* %p) #0 {			define void @load_fn_writeonly(i32* %p) #0 {
	%p.1 = getelementptr i32, i32* %p, i32 1			%p.1 = getelementptr i32, i32* %p, i32 1

	%v0 = load i32, i32* %p			%v0 = load i32, i32* %p, align 8
	call void @fn_writeonly() #3			call void @fn_writeonly() #3
	%v1 = load i32, i32* %p.1			%v1 = load i32, i32* %p.1, align 4
	ret void			ret void
	}			}

	; CHECK-LABEL: @load_fn_readnone			; CHECK-LABEL: @load_fn_readnone
	; CHECK-DAG: load <2 x i32>			; CHECK-DAG: load <2 x i32>
	; CHECK-DAG: call void @fn_readnone()			; CHECK-DAG: call void @fn_readnone()
	define void @load_fn_readnone(i32* %p) #0 {			define void @load_fn_readnone(i32* %p) #0 {
	%p.1 = getelementptr i32, i32* %p, i32 1			%p.1 = getelementptr i32, i32* %p, i32 1

	%v0 = load i32, i32* %p			%v0 = load i32, i32* %p, align 8
	call void @fn_readnone() #5			call void @fn_readnone() #5
	%v1 = load i32, i32* %p.1			%v1 = load i32, i32* %p.1, align 4
	ret void			ret void
	}			}

	; ------------------------------------------------			; ------------------------------------------------
	; Same tests, but now for stores instead of loads.			; Same tests, but now for stores instead of loads.
	; ------------------------------------------------			; ------------------------------------------------

	; CHECK-LABEL: @store_fn			; CHECK-LABEL: @store_fn
	▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines

	; This is the only store idiom we can vectorize.			; This is the only store idiom we can vectorize.
	; CHECK-LABEL: @store_fn_readnone			; CHECK-LABEL: @store_fn_readnone
	; CHECK-DAG: store <2 x i32>			; CHECK-DAG: store <2 x i32>
	; CHECK-DAG: call void @fn_readnone()			; CHECK-DAG: call void @fn_readnone()
	define void @store_fn_readnone(i32* %p) #0 {			define void @store_fn_readnone(i32* %p) #0 {
	%p.1 = getelementptr i32, i32* %p, i32 1			%p.1 = getelementptr i32, i32* %p, i32 1

	store i32 0, i32* %p			store i32 0, i32* %p, align 8
	call void @fn_readnone() #5			call void @fn_readnone() #5
	store i32 0, i32* %p.1			store i32 0, i32* %p.1, align 8
	ret void			ret void
	}			}


	attributes #0 = { nounwind }			attributes #0 = { nounwind }
	attributes #1 = { nounwind writeonly }			attributes #1 = { nounwind writeonly }
	attributes #2 = { nounwind readonly }			attributes #2 = { nounwind readonly }
	attributes #3 = { writeonly }			attributes #3 = { writeonly }
	attributes #4 = { readonly }			attributes #4 = { readonly }
	; readnone implies nounwind, so no need to test separately			; readnone implies nounwind, so no need to test separately
	attributes #5 = { nounwind readnone }			attributes #5 = { nounwind readnone }

llvm/trunk/test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll

	; RUN: opt -mtriple=nvptx64-nvidia-cuda -load-store-vectorizer -S -o - %s \| FileCheck %s			; RUN: opt -mtriple=nvptx64-nvidia-cuda -load-store-vectorizer -S -o - %s \| FileCheck %s

	; Load from a constant. This can be vectorized, but shouldn't crash us.			; Load from a constant. This can be vectorized, but shouldn't crash us.

	@global = internal addrspace(1) constant [4 x float] [float 0xBF71111120000000, float 0x3F70410420000000, float 0xBF81111120000000, float 0x3FB5555560000000], align 4			@global = internal addrspace(1) constant [4 x float] [float 0xBF71111120000000, float 0x3F70410420000000, float 0xBF81111120000000, float 0x3FB5555560000000], align 4

	define void @foo() {			define void @foo() {
	; CHECK: load <4 x float>			; CHECK: load <4 x float>
	%a = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 0), align 4			%a = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 0), align 16
	%b = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 1), align 4			%b = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 1), align 4
	%c = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 2), align 4			%c = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 2), align 4
	%d = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 3), align 4			%d = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 3), align 4
	ret void			ret void
	}			}

llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll

	; RUN: opt -mtriple=x86-linux -load-store-vectorizer -S -o - %s \| FileCheck %s			; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s \| FileCheck %s

	target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"			target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"

	; CHECK-LABEL: @correct_order(			; CHECK-LABEL: @correct_order(
	; CHECK: [[LOAD_PTR:%[0-9]+]] = bitcast i32* %next.gep1			; CHECK: [[LOAD_PTR:%[0-9]+]] = bitcast i32* %next.gep1
	; CHECK: load <2 x i32>, <2 x i32>* [[LOAD_PTR]]			; CHECK: load <2 x i32>, <2 x i32>* [[LOAD_PTR]]
	; CHECK: load i32, i32* %next.gep			; CHECK: load i32, i32* %next.gep
	; CHECK: [[STORE_PTR:%[0-9]+]] = bitcast i32* %next.gep			; CHECK: [[STORE_PTR:%[0-9]+]] = bitcast i32* %next.gep
	Show All 18 Lines

llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll

	Show All 11 Lines

	; CHECK-LABEL: @preserve_order_32(			; CHECK-LABEL: @preserve_order_32(
	; CHECK: load <2 x i32>			; CHECK: load <2 x i32>
	; CHECK: %buff.val = load i8			; CHECK: %buff.val = load i8
	; CHECK: store i8 0			; CHECK: store i8 0
	define void @preserve_order_32(%struct.buffer_t* noalias %buff) #0 {			define void @preserve_order_32(%struct.buffer_t* noalias %buff) #0 {
	entry:			entry:
	%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i32 0, i32 1			%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i32 0, i32 1
	%buff.p = load i8, i8* %tmp1, align 8			%buff.p = load i8, i8* %tmp1
	%buff.val = load i8, i8* %buff.p, align 8			%buff.val = load i8, i8* %buff.p
	store i8 0, i8* %buff.p, align 8			store i8 0, i8* %buff.p, align 8
	%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i32 0, i32 0			%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i32 0, i32 0
	%buff.int = load i32, i32* %tmp0, align 8			%buff.int = load i32, i32* %tmp0, align 8
	ret void			ret void
	}			}

	attributes #0 = { nounwind }			attributes #0 = { nounwind }

llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll

	; RUN: opt -mtriple=x86-linux -load-store-vectorizer -S -o - %s \| FileCheck %s			; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s \| FileCheck %s

	target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"			target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"

	%struct.buffer_t = type { i64, i8* }			%struct.buffer_t = type { i64, i8* }
	%struct.nested.buffer = type { %struct.buffer_t, %struct.buffer_t }			%struct.nested.buffer = type { %struct.buffer_t, %struct.buffer_t }

	; Check an i64 and i8* get vectorized, and that the two accesses			; Check an i64 and i8* get vectorized, and that the two accesses
	; (load into buff.val and store to buff.p) preserve their order.			; (load into buff.val and store to buff.p) preserve their order.
	; Vectorized loads should be inserted at the position of the first load,			; Vectorized loads should be inserted at the position of the first load,
	; and instructions which were between the first and last load should be			; and instructions which were between the first and last load should be
	; reordered preserving their relative order inasmuch as possible.			; reordered preserving their relative order inasmuch as possible.

	; CHECK-LABEL: @preserve_order_64(			; CHECK-LABEL: @preserve_order_64(
	; CHECK: load <2 x i64>			; CHECK: load <2 x i64>
	; CHECK: %buff.val = load i8			; CHECK: %buff.val = load i8
	; CHECK: store i8 0			; CHECK: store i8 0
	define void @preserve_order_64(%struct.buffer_t* noalias %buff) #0 {			define void @preserve_order_64(%struct.buffer_t* noalias %buff) #0 {
	entry:			entry:
	%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 1			%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 1
	%buff.p = load i8, i8* %tmp1, align 8			%buff.p = load i8, i8* %tmp1
	%buff.val = load i8, i8* %buff.p, align 8			%buff.val = load i8, i8* %buff.p
	store i8 0, i8* %buff.p, align 8			store i8 0, i8* %buff.p, align 8
	%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 0			%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 0
	%buff.int = load i64, i64* %tmp0, align 8			%buff.int = load i64, i64* %tmp0, align 16
	ret void			ret void
	}			}

	; Check reordering recurses correctly.			; Check reordering recurses correctly.

	; CHECK-LABEL: @transitive_reorder(			; CHECK-LABEL: @transitive_reorder(
	; CHECK: load <2 x i64>			; CHECK: load <2 x i64>
	; CHECK: %buff.val = load i8			; CHECK: %buff.val = load i8
	; CHECK: store i8 0			; CHECK: store i8 0
	define void @transitive_reorder(%struct.buffer_t* noalias %buff, %struct.nested.buffer* noalias %nest) #0 {			define void @transitive_reorder(%struct.buffer_t* noalias %buff, %struct.nested.buffer* noalias %nest) #0 {
	entry:			entry:
	%nest0_0 = getelementptr inbounds %struct.nested.buffer, %struct.nested.buffer* %nest, i64 0, i32 0			%nest0_0 = getelementptr inbounds %struct.nested.buffer, %struct.nested.buffer* %nest, i64 0, i32 0
	%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %nest0_0, i64 0, i32 1			%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %nest0_0, i64 0, i32 1
	%buff.p = load i8, i8* %tmp1, align 8			%buff.p = load i8, i8* %tmp1
	%buff.val = load i8, i8* %buff.p, align 8			%buff.val = load i8, i8* %buff.p
	store i8 0, i8* %buff.p, align 8			store i8 0, i8* %buff.p, align 8
	%nest1_0 = getelementptr inbounds %struct.nested.buffer, %struct.nested.buffer* %nest, i64 0, i32 0			%nest1_0 = getelementptr inbounds %struct.nested.buffer, %struct.nested.buffer* %nest, i64 0, i32 0
	%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %nest1_0, i64 0, i32 0			%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %nest1_0, i64 0, i32 0
	%buff.int = load i64, i64* %tmp0, align 8			%buff.int = load i64, i64* %tmp0, align 16
	ret void			ret void
	}			}

	; Check for no vectorization over phi node			; Check for no vectorization over phi node

	; CHECK-LABEL: @no_vect_phi(			; CHECK-LABEL: @no_vect_phi(
	; CHECK: load i8*			; CHECK: load i8*
	; CHECK: load i8			; CHECK: load i8
	; CHECK: store i8 0			; CHECK: store i8 0
	; CHECK: load i64			; CHECK: load i64
	define void @no_vect_phi(i32* noalias %ptr, %struct.buffer_t* noalias %buff) {			define void @no_vect_phi(i32* noalias %ptr, %struct.buffer_t* noalias %buff) {
	entry:			entry:
	%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 1			%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 1
	%buff.p = load i8, i8* %tmp1, align 8			%buff.p = load i8, i8* %tmp1
	%buff.val = load i8, i8* %buff.p, align 8			%buff.val = load i8, i8* %buff.p
	store i8 0, i8* %buff.p, align 8			store i8 0, i8* %buff.p, align 8
	br label %"for something"			br label %"for something"

	"for something":			"for something":
	%index = phi i64 [ 0, %entry ], [ %index.next, %"for something" ]			%index = phi i64 [ 0, %entry ], [ %index.next, %"for something" ]

	%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 0			%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 0
	%buff.int = load i64, i64* %tmp0, align 8			%buff.int = load i64, i64* %tmp0, align 16

	%index.next = add i64 %index, 8			%index.next = add i64 %index, 8
	%cmp_res = icmp eq i64 %index.next, 8			%cmp_res = icmp eq i64 %index.next, 8
	br i1 %cmp_res, label %ending, label %"for something"			br i1 %cmp_res, label %ending, label %"for something"

	ending:			ending:
	ret void			ret void
	}			}

	attributes #0 = { nounwind }			attributes #0 = { nounwind }

llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll

	; RUN: opt -mtriple=x86-linux -load-store-vectorizer -S -o - %s \| FileCheck %s			; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s \| FileCheck %s

	target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"			target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"

	; Vectorized subsets of the load/store chains in the presence of			; Vectorized subsets of the load/store chains in the presence of
	; interleaved loads/stores			; interleaved loads/stores

	; CHECK-LABEL: @interleave_2L_2S(			; CHECK-LABEL: @interleave_2L_2S(
	; CHECK: load <2 x i32>			; CHECK: load <2 x i32>
	▲ Show 20 Lines • Show All 78 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

LoadStoreVectorizer: Remove TargetBaseAlign. Keep alignment for stack adjustments.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 66821

llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h

llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h

llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h

llvm/trunk/lib/Analysis/TargetTransformInfo.cpp

llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp

llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

llvm/trunk/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll

llvm/trunk/test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll

llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll

llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll

llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll

llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll

This is an archive of the discontinued LLVM Phabricator instance.

LoadStoreVectorizer: Remove TargetBaseAlign. Keep alignment for stack adjustments.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 66821

llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h

llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h

llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h

llvm/trunk/lib/Analysis/TargetTransformInfo.cpp

llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp

llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

llvm/trunk/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll

llvm/trunk/test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll

llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll

llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll

llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll

llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll

LoadStoreVectorizer: Remove TargetBaseAlign. Keep alignment for stack adjustments.
ClosedPublic