Diff 414143

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Show First 20 Lines • Show All 387 Lines • ▼ Show 20 Lines	uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
if (MFI.hasPrivateSegmentBuffer()) {		if (MFI.hasPrivateSegmentBuffer()) {
KernelCodeProperties \|=		KernelCodeProperties \|=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;		amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
}		}
if (MFI.hasDispatchPtr()) {		if (MFI.hasDispatchPtr()) {
KernelCodeProperties \|=		KernelCodeProperties \|=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;		amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
}		}
if (MFI.hasQueuePtr()) {		if (MFI.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) {
KernelCodeProperties \|=		KernelCodeProperties \|=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;		amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
}		}
if (MFI.hasKernargSegmentPtr()) {		if (MFI.hasKernargSegmentPtr()) {
KernelCodeProperties \|=		KernelCodeProperties \|=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;		amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
}		}
if (MFI.hasDispatchID()) {		if (MFI.hasDispatchID()) {
▲ Show 20 Lines • Show All 680 Lines • ▼ Show 20 Lines	void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
if (MFI->hasPrivateSegmentBuffer()) {		if (MFI->hasPrivateSegmentBuffer()) {
Out.code_properties \|=		Out.code_properties \|=
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;		AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
}		}

if (MFI->hasDispatchPtr())		if (MFI->hasDispatchPtr())
Out.code_properties \|= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;		Out.code_properties \|= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;

if (MFI->hasQueuePtr())		if (MFI->hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5)
Out.code_properties \|= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;		Out.code_properties \|= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;

if (MFI->hasKernargSegmentPtr())		if (MFI->hasKernargSegmentPtr())
Out.code_properties \|= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;		Out.code_properties \|= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;

if (MFI->hasDispatchID())		if (MFI->hasDispatchID())
Out.code_properties \|= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;		Out.code_properties \|= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;

▲ Show 20 Lines • Show All 67 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Show First 20 Lines • Show All 44 Lines • ▼ Show 20 Lines
};		};

// We do not need to note the x workitem or workgroup id because they are always		// We do not need to note the x workitem or workgroup id because they are always
// initialized.		// initialized.
//		//
// TODO: We should not add the attributes if the known compile time workgroup		// TODO: We should not add the attributes if the known compile time workgroup
// size is 1 for y/z.		// size is 1 for y/z.
static ImplicitArgumentMask		static ImplicitArgumentMask
intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) {		intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
		bool HasApertureRegs, bool SupportsGetDoorBellID) {
		unsigned CodeObjectVersion = AMDGPU::getAmdhsaCodeObjectVersion();
		arsenmUnsubmitted Not Done Reply Inline Actions This really ought to be something read from the IR arsenm: This really ought to be something read from the IR
		cfangAuthorUnsubmitted Done Reply Inline Actions As discussed in another review (https://reviews.llvm.org/D119027 "IIUC the global opt var is the best we have right now, and any improvement to that situation is orthogonal to this change. I would vote that this not block the patch under review!"), we are using the global opt var for this change. cfang: As discussed in another review (https://reviews.llvm.org/D119027 "IIUC the global opt var is…
switch (ID) {		switch (ID) {
case Intrinsic::amdgcn_workitem_id_x:		case Intrinsic::amdgcn_workitem_id_x:
NonKernelOnly = true;		NonKernelOnly = true;
return WORKITEM_ID_X;		return WORKITEM_ID_X;
case Intrinsic::amdgcn_workgroup_id_x:		case Intrinsic::amdgcn_workgroup_id_x:
NonKernelOnly = true;		NonKernelOnly = true;
return WORKGROUP_ID_X;		return WORKGROUP_ID_X;
case Intrinsic::amdgcn_workitem_id_y:		case Intrinsic::amdgcn_workitem_id_y:
Show All 9 Lines	intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
case Intrinsic::r600_read_tgid_z:		case Intrinsic::r600_read_tgid_z:
return WORKGROUP_ID_Z;		return WORKGROUP_ID_Z;
case Intrinsic::amdgcn_dispatch_ptr:		case Intrinsic::amdgcn_dispatch_ptr:
return DISPATCH_PTR;		return DISPATCH_PTR;
case Intrinsic::amdgcn_dispatch_id:		case Intrinsic::amdgcn_dispatch_id:
return DISPATCH_ID;		return DISPATCH_ID;
case Intrinsic::amdgcn_implicitarg_ptr:		case Intrinsic::amdgcn_implicitarg_ptr:
return IMPLICIT_ARG_PTR;		return IMPLICIT_ARG_PTR;
		// Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
		// queue_ptr.
case Intrinsic::amdgcn_queue_ptr:		case Intrinsic::amdgcn_queue_ptr:
		NeedsImplicit = (CodeObjectVersion == 5);
		return QUEUE_PTR;
case Intrinsic::amdgcn_is_shared:		case Intrinsic::amdgcn_is_shared:
case Intrinsic::amdgcn_is_private:		case Intrinsic::amdgcn_is_private:
// TODO: Does not require the queue pointer on gfx9+		if (HasApertureRegs)
		return NOT_IMPLICIT_INPUT;
		// Under V5, we need implicitarg_ptr + offsets to access private_base or
		// shared_base. For pre-V5, however, need to access them through queue_ptr +
		// offsets.
		return CodeObjectVersion == 5 ? IMPLICIT_ARG_PTR : QUEUE_PTR;
case Intrinsic::trap:		case Intrinsic::trap:
case Intrinsic::debugtrap:		if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
IsQueuePtr = true;		return CodeObjectVersion >= 4 ? NOT_IMPLICIT_INPUT : QUEUE_PTR;
		NeedsImplicit = (CodeObjectVersion == 5); // Need impicitarg_ptr under V5.
return QUEUE_PTR;		return QUEUE_PTR;
default:		default:
return NOT_IMPLICIT_INPUT;		return NOT_IMPLICIT_INPUT;
}		}
}		}

static bool castRequiresQueuePtr(unsigned SrcAS) {		static bool castRequiresQueuePtr(unsigned SrcAS) {
return SrcAS == AMDGPUAS::LOCAL_ADDRESS \|\| SrcAS == AMDGPUAS::PRIVATE_ADDRESS;		return SrcAS == AMDGPUAS::LOCAL_ADDRESS \|\| SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
Show All 30 Lines	public:
enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };		enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };

/// Check if the subtarget has aperture regs.		/// Check if the subtarget has aperture regs.
bool hasApertureRegs(Function &F) {		bool hasApertureRegs(Function &F) {
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);		const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
return ST.hasApertureRegs();		return ST.hasApertureRegs();
}		}

		/// Check if the subtarget supports GetDoorbellID.
		bool supportsGetDoorbellID(Function &F) {
		const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
		return ST.supportsGetDoorbellID();
		}

std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {		std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);		const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
return ST.getFlatWorkGroupSizes(F);		return ST.getFlatWorkGroupSizes(F);
}		}

std::pair<unsigned, unsigned>		std::pair<unsigned, unsigned>
getMaximumFlatWorkGroupRange(const Function &F) {		getMaximumFlatWorkGroupRange(const Function &F) {
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);		const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
▲ Show 20 Lines • Show All 236 Lines • ▼ Show 20 Lines	ChangeStatus updateImpl(Attributor &A) override {
// Check for Intrinsics and propagate attributes.		// Check for Intrinsics and propagate attributes.
const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(		const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
*this, this->getIRPosition(), DepClassTy::REQUIRED);		*this, this->getIRPosition(), DepClassTy::REQUIRED);
if (AAEdges.hasNonAsmUnknownCallee())		if (AAEdges.hasNonAsmUnknownCallee())
return indicatePessimisticFixpoint();		return indicatePessimisticFixpoint();

bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());		bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());

bool NeedsQueuePtr = false;		bool NeedsImplicit = false;
		auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
		bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
		bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);

for (Function *Callee : AAEdges.getOptimisticEdges()) {		for (Function *Callee : AAEdges.getOptimisticEdges()) {
Intrinsic::ID IID = Callee->getIntrinsicID();		Intrinsic::ID IID = Callee->getIntrinsicID();
if (IID == Intrinsic::not_intrinsic) {		if (IID == Intrinsic::not_intrinsic) {
const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(		const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
this, IRPosition::function(Callee), DepClassTy::REQUIRED);		this, IRPosition::function(Callee), DepClassTy::REQUIRED);
*this &= AAAMD;		*this &= AAAMD;
continue;		continue;
}		}

bool NonKernelOnly = false;		bool NonKernelOnly = false;
ImplicitArgumentMask AttrMask =		ImplicitArgumentMask AttrMask =
intrinsicToAttrMask(IID, NonKernelOnly, NeedsQueuePtr);		intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
		HasApertureRegs, SupportsGetDoorbellID);
if (AttrMask != NOT_IMPLICIT_INPUT) {		if (AttrMask != NOT_IMPLICIT_INPUT) {
if ((IsNonEntryFunc \|\| !NonKernelOnly))		if ((IsNonEntryFunc \|\| !NonKernelOnly))
removeAssumedBits(AttrMask);		removeAssumedBits(AttrMask);
}		}
}		}

if (!NeedsQueuePtr) {		// Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
		arsenmUnsubmitted Not Done Reply Inline Actions Typo acess arsenm: Typo acess
NeedsQueuePtr = checkForQueuePtr(A);		if (NeedsImplicit)
}		removeAssumedBits(IMPLICIT_ARG_PTR);

if (NeedsQueuePtr) {		if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
		arsenmUnsubmitted Not Done Reply Inline Actions This should recognize both the intrinsic and load from the specific offset from the implicitarg ptr, similar to the new hostcall handling. We still should be able to infer no queue ptr with it in memory arsenm: This should recognize both the intrinsic and load from the specific offset from the implicitarg…
		cfangAuthorUnsubmitted Done Reply Inline Actions This is different from the case of hostcall handling. We are handling aperture bases in the backend. We do not have explicit intrinsic call for implicitarf ptr. cfang: This is different from the case of hostcall handling. We are handling aperture bases in the…
		arsenmUnsubmitted Not Done Reply Inline Actions It's not different because some subtargets still use the queue pointer from here (pre gfx9) arsenm: It's not different because some subtargets still use the queue pointer from here (pre gfx9)
		cfangAuthorUnsubmitted Not Done Reply Inline Actions I know some subtargets still use the queue pointer. However, you suggest we use similar approach as we handle hostcall. But we actually have the different case. For hostcall, we are using implicitarg_ptr + offset, but for aperture bases, we do not have implicitarg_ptr intrinsic call at all. cfang: I know some subtargets still use the queue pointer. However, you suggest we use similar…
		arsenmUnsubmitted Not Done Reply Inline Actions The logical queue pointer value still exists and we can infer that it's not needed, just like hostcall in this case arsenm: The logical queue pointer value still exists and we can infer that it's not needed, just like…
		arsenmUnsubmitted Done Reply Inline Actions We should still be tracking the logical queue pointer arsenm: We should still be tracking the logical queue pointer
		cfangAuthorUnsubmitted Done Reply Inline Actions Can you be explicit what is "logical queue pointer" here> And why do we need to trace it? cfang: Can you be explicit what is "logical queue pointer" here> And why do we need to trace it?
		// Under V5, we need implicitarg_ptr + offsets to access private_base or
		// shared_base. We do not actually need queue_ptr.
		if (AMDGPU::getAmdhsaCodeObjectVersion() == 5)
		removeAssumedBits(IMPLICIT_ARG_PTR);
		else
removeAssumedBits(QUEUE_PTR);		removeAssumedBits(QUEUE_PTR);
}		}

if (funcRetrievesHostcallPtr(A)) {		if (funcRetrievesHostcallPtr(A)) {
assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");		assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
removeAssumedBits(HOSTCALL_PTR);		removeAssumedBits(HOSTCALL_PTR);
}		}

if (funcRetrievesHeapPtr(A)) {		if (funcRetrievesHeapPtr(A)) {
assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");		assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
removeAssumedBits(HEAP_PTR);		removeAssumedBits(HEAP_PTR);
}		}

		if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A)) {
		assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
		removeAssumedBits(QUEUE_PTR);
		}

return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED		return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
: ChangeStatus::UNCHANGED;		: ChangeStatus::UNCHANGED;
}		}

ChangeStatus manifest(Attributor &A) override {		ChangeStatus manifest(Attributor &A) override {
SmallVector<Attribute, 8> AttrList;		SmallVector<Attribute, 8> AttrList;
LLVMContext &Ctx = getAssociatedFunction()->getContext();		LLVMContext &Ctx = getAssociatedFunction()->getContext();

▲ Show 20 Lines • Show All 80 Lines • ▼ Show 20 Lines	private:
bool funcRetrievesHeapPtr(Attributor &A) {		bool funcRetrievesHeapPtr(Attributor &A) {
if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)		if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)
return false;		return false;
auto Pos = llvm::AMDGPU::getHeapPtrImplicitArgPosition();		auto Pos = llvm::AMDGPU::getHeapPtrImplicitArgPosition();
AAPointerInfo::OffsetAndSize OAS(Pos, 8);		AAPointerInfo::OffsetAndSize OAS(Pos, 8);
return funcRetrievesImplicitKernelArg(A, OAS);		return funcRetrievesImplicitKernelArg(A, OAS);
}		}

		bool funcRetrievesQueuePtr(Attributor &A) {
		if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)
		return false;
		auto Pos = llvm::AMDGPU::getQueuePtrImplicitArgPosition();
		AAPointerInfo::OffsetAndSize OAS(Pos, 8);
		return funcRetrievesImplicitKernelArg(A, OAS);
		}

bool funcRetrievesImplicitKernelArg(Attributor &A,		bool funcRetrievesImplicitKernelArg(Attributor &A,
AAPointerInfo::OffsetAndSize OAS) {		AAPointerInfo::OffsetAndSize OAS) {
// Check if this is a call to the implicitarg_ptr builtin and it		// Check if this is a call to the implicitarg_ptr builtin and it
// is used to retrieve the hostcall pointer. The implicit arg for		// is used to retrieve the hostcall pointer. The implicit arg for
// hostcall is not used only if every use of the implicitarg_ptr		// hostcall is not used only if every use of the implicitarg_ptr
// is a load that clearly does not retrieve any byte of the		// is a load that clearly does not retrieve any byte of the
// hostcall pointer. We check this by tracing all the uses of the		// hostcall pointer. We check this by tracing all the uses of the
// initial call to the implicitarg_ptr intrinsic.		// initial call to the implicitarg_ptr intrinsic.
▲ Show 20 Lines • Show All 198 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

Show First 20 Lines • Show All 447 Lines • ▼ Show 20 Lines	static void allocateHSAUserSGPRs(CCState &CCInfo,
}		}

if (Info.hasDispatchPtr()) {		if (Info.hasDispatchPtr()) {
Register DispatchPtrReg = Info.addDispatchPtr(TRI);		Register DispatchPtrReg = Info.addDispatchPtr(TRI);
MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);		MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchPtrReg);		CCInfo.AllocateReg(DispatchPtrReg);
}		}

if (Info.hasQueuePtr()) {		if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) {
Register QueuePtrReg = Info.addQueuePtr(TRI);		Register QueuePtrReg = Info.addQueuePtr(TRI);
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);		MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(QueuePtrReg);		CCInfo.AllocateReg(QueuePtrReg);
}		}

if (Info.hasKernargSegmentPtr()) {		if (Info.hasKernargSegmentPtr()) {
MachineRegisterInfo &MRI = MF.getRegInfo();		MachineRegisterInfo &MRI = MF.getRegInfo();
Register InputPtrReg = Info.addKernargSegmentPtr(TRI);		Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
▲ Show 20 Lines • Show All 951 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

Show First 20 Lines • Show All 1,037 Lines • ▼ Show 20 Lines	emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
Args);		Args);
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,		emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
Args);		Args);
} else		} else
Offset += 16; // Skipped.		Offset += 16; // Skipped.

Offset += 72; // Reserved.		Offset += 72; // Reserved.

// hidden_private_base and hidden_shared_base are only used by GFX8.		// hidden_private_base and hidden_shared_base are only when the subtarget has
if (ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) {		// ApertureRegs.
		if (!ST.hasApertureRegs()) {
emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);		emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);		emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
} else		} else
Offset += 8; // Skipped.		Offset += 8; // Skipped.

if (MFI.hasQueuePtr())		if (MFI.hasQueuePtr())
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);		emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
}		}

} // end namespace HSAMD		} // end namespace HSAMD
} // end namespace AMDGPU		} // end namespace AMDGPU
} // end namespace llvm		} // end namespace llvm

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 2,109 Lines • ▼ Show 20 Lines	void SITargetLowering::allocateSpecialInputSGPRs(
const SIRegisterInfo &TRI,		const SIRegisterInfo &TRI,
SIMachineFunctionInfo &Info) const {		SIMachineFunctionInfo &Info) const {
auto &ArgInfo = Info.getArgInfo();		auto &ArgInfo = Info.getArgInfo();

// TODO: Unify handling with private memory pointers.		// TODO: Unify handling with private memory pointers.
if (Info.hasDispatchPtr())		if (Info.hasDispatchPtr())
allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr);		allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr);

if (Info.hasQueuePtr())		if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5)
allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr);		allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr);

// Implicit arg ptr takes the place of the kernarg segment pointer. This is a		// Implicit arg ptr takes the place of the kernarg segment pointer. This is a
// constant offset from the kernarg segment.		// constant offset from the kernarg segment.
if (Info.hasImplicitArgPtr())		if (Info.hasImplicitArgPtr())
allocateSGPR64Input(CCInfo, ArgInfo.ImplicitArgPtr);		allocateSGPR64Input(CCInfo, ArgInfo.ImplicitArgPtr);

if (Info.hasDispatchID())		if (Info.hasDispatchID())
Show All 30 Lines	void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
}		}

if (Info.hasDispatchPtr()) {		if (Info.hasDispatchPtr()) {
Register DispatchPtrReg = Info.addDispatchPtr(TRI);		Register DispatchPtrReg = Info.addDispatchPtr(TRI);
MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);		MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchPtrReg);		CCInfo.AllocateReg(DispatchPtrReg);
}		}

if (Info.hasQueuePtr()) {		if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) {
Register QueuePtrReg = Info.addQueuePtr(TRI);		Register QueuePtrReg = Info.addQueuePtr(TRI);
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);		MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(QueuePtrReg);		CCInfo.AllocateReg(QueuePtrReg);
}		}

if (Info.hasKernargSegmentPtr()) {		if (Info.hasKernargSegmentPtr()) {
MachineRegisterInfo &MRI = MF.getRegInfo();		MachineRegisterInfo &MRI = MF.getRegInfo();
Register InputPtrReg = Info.addKernargSegmentPtr(TRI);		Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
▲ Show 20 Lines • Show All 10,437 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

	Show First 20 Lines • Show All 54 Lines • ▼ Show 20 Lines
	bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);			bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);

	/// \returns The offset of the hostcall pointer argument from implicitarg_ptr			/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
	unsigned getHostcallImplicitArgPosition();			unsigned getHostcallImplicitArgPosition();

	/// \returns The offset of the heap ptr argument from implicitarg_ptr			/// \returns The offset of the heap ptr argument from implicitarg_ptr
	unsigned getHeapPtrImplicitArgPosition();			unsigned getHeapPtrImplicitArgPosition();

				/// \returns The offset of the queue ptr argument from implicitarg_ptr
				unsigned getQueuePtrImplicitArgPosition();

	/// \returns Code object version.			/// \returns Code object version.
	unsigned getAmdhsaCodeObjectVersion();			unsigned getAmdhsaCodeObjectVersion();

	struct GcnBufferFormatInfo {			struct GcnBufferFormatInfo {
	unsigned Format;			unsigned Format;
	unsigned BitsPerComp;			unsigned BitsPerComp;
	unsigned NumComponents;			unsigned NumComponents;
	unsigned NumFormat;			unsigned NumFormat;
	▲ Show 20 Lines • Show All 982 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

	Show First 20 Lines • Show All 157 Lines • ▼ Show 20 Lines

	unsigned getHeapPtrImplicitArgPosition() {			unsigned getHeapPtrImplicitArgPosition() {
	if (AmdhsaCodeObjectVersion == 5)			if (AmdhsaCodeObjectVersion == 5)
	return 96;			return 96;
	llvm_unreachable("hidden_heap is supported only by code object version 5");			llvm_unreachable("hidden_heap is supported only by code object version 5");
	return 0;			return 0;
	}			}

				unsigned getQueuePtrImplicitArgPosition() {
				if (AmdhsaCodeObjectVersion == 5)
				return 200;
				llvm_unreachable("queue_ptr is supported only by code object version 5");
				return 0;
				}
				arsenmUnsubmitted Not Done Reply Inline Actions This isn't a scalable solution for all of the inputs. Should have an enum with offsets or something arsenm: This isn't a scalable solution for all of the inputs. Should have an enum with offsets or…
				cfangAuthorUnsubmitted Done Reply Inline Actions Will do this in a following patch because the enum of the implicit kernel arguments will also be used there. Still thinking of the appropriate definition. cfang: Will do this in a following patch because the enum of the implicit kernel arguments will also…

	#define GET_MIMGBaseOpcodesTable_IMPL			#define GET_MIMGBaseOpcodesTable_IMPL
	#define GET_MIMGDimInfoTable_IMPL			#define GET_MIMGDimInfoTable_IMPL
	#define GET_MIMGInfoTable_IMPL			#define GET_MIMGInfoTable_IMPL
	#define GET_MIMGLZMappingTable_IMPL			#define GET_MIMGLZMappingTable_IMPL
	#define GET_MIMGMIPMappingTable_IMPL			#define GET_MIMGMIPMappingTable_IMPL
	#define GET_MIMGBiasMappingTable_IMPL			#define GET_MIMGBiasMappingTable_IMPL
	#define GET_MIMGOffsetMappingTable_IMPL			#define GET_MIMGOffsetMappingTable_IMPL
	#define GET_MIMGG16MappingTable_IMPL			#define GET_MIMGG16MappingTable_IMPL
	▲ Show 20 Lines • Show All 1,946 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll

	Show All 20 Lines
	; CI-NEXT: s_waitcnt lgkmcnt(0)			; CI-NEXT: s_waitcnt lgkmcnt(0)
	; CI-NEXT: v_cmp_eq_u32_e32 vcc, s0, v1			; CI-NEXT: v_cmp_eq_u32_e32 vcc, s0, v1
	; CI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc			; CI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
	; CI-NEXT: flat_store_dword v[0:1], v0			; CI-NEXT: flat_store_dword v[0:1], v0
	; CI-NEXT: s_endpgm			; CI-NEXT: s_endpgm
	;			;
	; GFX9-LABEL: is_private_vgpr:			; GFX9-LABEL: is_private_vgpr:
	; GFX9: ; %bb.0:			; GFX9: ; %bb.0:
	; GFX9-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0			; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0
	; GFX9-NEXT: s_waitcnt lgkmcnt(0)			; GFX9-NEXT: s_waitcnt lgkmcnt(0)
	; GFX9-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc			; GFX9-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc
	; GFX9-NEXT: s_waitcnt vmcnt(0)			; GFX9-NEXT: s_waitcnt vmcnt(0)
	; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)			; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
	; GFX9-NEXT: s_lshl_b32 s0, s0, 16			; GFX9-NEXT: s_lshl_b32 s0, s0, 16
	; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s0, v1			; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s0, v1
	; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc			; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
	; GFX9-NEXT: global_store_dword v[0:1], v0, off			; GFX9-NEXT: global_store_dword v[0:1], v0, off
	; GFX9-NEXT: s_endpgm			; GFX9-NEXT: s_endpgm
	;			;
	; GFX10-LABEL: is_private_vgpr:			; GFX10-LABEL: is_private_vgpr:
	; GFX10: ; %bb.0:			; GFX10: ; %bb.0:
	; GFX10-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0			; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
	; GFX10-NEXT: s_waitcnt lgkmcnt(0)			; GFX10-NEXT: s_waitcnt lgkmcnt(0)
	; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc			; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: s_waitcnt_depctr 0xffe3			; GFX10-NEXT: s_waitcnt_depctr 0xffe3
	; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)			; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
	; GFX10-NEXT: s_lshl_b32 s0, s0, 16			; GFX10-NEXT: s_lshl_b32 s0, s0, 16
	; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s0, v1			; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s0, v1
	Show All 22 Lines
	; CI-NEXT: v_mov_b32_e32 v0, 0			; CI-NEXT: v_mov_b32_e32 v0, 0
	; CI-NEXT: flat_store_dword v[0:1], v0			; CI-NEXT: flat_store_dword v[0:1], v0
	; CI-NEXT: s_waitcnt vmcnt(0)			; CI-NEXT: s_waitcnt vmcnt(0)
	; CI-NEXT: .LBB1_2: ; %bb1			; CI-NEXT: .LBB1_2: ; %bb1
	; CI-NEXT: s_endpgm			; CI-NEXT: s_endpgm
	;			;
	; GFX9-LABEL: is_private_sgpr:			; GFX9-LABEL: is_private_sgpr:
	; GFX9: ; %bb.0:			; GFX9: ; %bb.0:
	; GFX9-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; GFX9-NEXT: s_waitcnt lgkmcnt(0)			; GFX9-NEXT: s_waitcnt lgkmcnt(0)
	; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)			; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
	; GFX9-NEXT: s_lshl_b32 s0, s0, 16			; GFX9-NEXT: s_lshl_b32 s0, s0, 16
	; GFX9-NEXT: s_cmp_lg_u32 s1, s0			; GFX9-NEXT: s_cmp_lg_u32 s1, s0
	; GFX9-NEXT: s_cbranch_scc1 .LBB1_2			; GFX9-NEXT: s_cbranch_scc1 .LBB1_2
	; GFX9-NEXT: ; %bb.1: ; %bb0			; GFX9-NEXT: ; %bb.1: ; %bb0
	; GFX9-NEXT: v_mov_b32_e32 v0, 0			; GFX9-NEXT: v_mov_b32_e32 v0, 0
	; GFX9-NEXT: global_store_dword v[0:1], v0, off			; GFX9-NEXT: global_store_dword v[0:1], v0, off
	; GFX9-NEXT: s_waitcnt vmcnt(0)			; GFX9-NEXT: s_waitcnt vmcnt(0)
	; GFX9-NEXT: .LBB1_2: ; %bb1			; GFX9-NEXT: .LBB1_2: ; %bb1
	; GFX9-NEXT: s_endpgm			; GFX9-NEXT: s_endpgm
	;			;
	; GFX10-LABEL: is_private_sgpr:			; GFX10-LABEL: is_private_sgpr:
	; GFX10: ; %bb.0:			; GFX10: ; %bb.0:
	; GFX10-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; GFX10-NEXT: s_waitcnt lgkmcnt(0)			; GFX10-NEXT: s_waitcnt lgkmcnt(0)
	; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)			; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
	; GFX10-NEXT: s_lshl_b32 s0, s0, 16			; GFX10-NEXT: s_lshl_b32 s0, s0, 16
	; GFX10-NEXT: s_cmp_lg_u32 s1, s0			; GFX10-NEXT: s_cmp_lg_u32 s1, s0
	; GFX10-NEXT: s_cbranch_scc1 .LBB1_2			; GFX10-NEXT: s_cbranch_scc1 .LBB1_2
	; GFX10-NEXT: ; %bb.1: ; %bb0			; GFX10-NEXT: ; %bb.1: ; %bb0
	; GFX10-NEXT: v_mov_b32_e32 v0, 0			; GFX10-NEXT: v_mov_b32_e32 v0, 0
	; GFX10-NEXT: global_store_dword v[0:1], v0, off			; GFX10-NEXT: global_store_dword v[0:1], v0, off
	Show All 18 Lines

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll

	Show All 20 Lines
	; CI-NEXT: s_waitcnt lgkmcnt(0)			; CI-NEXT: s_waitcnt lgkmcnt(0)
	; CI-NEXT: v_cmp_eq_u32_e32 vcc, s0, v1			; CI-NEXT: v_cmp_eq_u32_e32 vcc, s0, v1
	; CI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc			; CI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
	; CI-NEXT: flat_store_dword v[0:1], v0			; CI-NEXT: flat_store_dword v[0:1], v0
	; CI-NEXT: s_endpgm			; CI-NEXT: s_endpgm
	;			;
	; GFX9-LABEL: is_local_vgpr:			; GFX9-LABEL: is_local_vgpr:
	; GFX9: ; %bb.0:			; GFX9: ; %bb.0:
	; GFX9-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0			; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0
	; GFX9-NEXT: s_waitcnt lgkmcnt(0)			; GFX9-NEXT: s_waitcnt lgkmcnt(0)
	; GFX9-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc			; GFX9-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc
	; GFX9-NEXT: s_waitcnt vmcnt(0)			; GFX9-NEXT: s_waitcnt vmcnt(0)
	; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)			; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
	; GFX9-NEXT: s_lshl_b32 s0, s0, 16			; GFX9-NEXT: s_lshl_b32 s0, s0, 16
	; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s0, v1			; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s0, v1
	; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc			; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
	; GFX9-NEXT: global_store_dword v[0:1], v0, off			; GFX9-NEXT: global_store_dword v[0:1], v0, off
	; GFX9-NEXT: s_endpgm			; GFX9-NEXT: s_endpgm
	;			;
	; GFX10-LABEL: is_local_vgpr:			; GFX10-LABEL: is_local_vgpr:
	; GFX10: ; %bb.0:			; GFX10: ; %bb.0:
	; GFX10-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0			; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
	; GFX10-NEXT: s_waitcnt lgkmcnt(0)			; GFX10-NEXT: s_waitcnt lgkmcnt(0)
	; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc			; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
	; GFX10-NEXT: s_waitcnt vmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0)
	; GFX10-NEXT: s_waitcnt_depctr 0xffe3			; GFX10-NEXT: s_waitcnt_depctr 0xffe3
	; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)			; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
	; GFX10-NEXT: s_lshl_b32 s0, s0, 16			; GFX10-NEXT: s_lshl_b32 s0, s0, 16
	; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s0, v1			; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s0, v1
	Show All 22 Lines
	; CI-NEXT: v_mov_b32_e32 v0, 0			; CI-NEXT: v_mov_b32_e32 v0, 0
	; CI-NEXT: flat_store_dword v[0:1], v0			; CI-NEXT: flat_store_dword v[0:1], v0
	; CI-NEXT: s_waitcnt vmcnt(0)			; CI-NEXT: s_waitcnt vmcnt(0)
	; CI-NEXT: .LBB1_2: ; %bb1			; CI-NEXT: .LBB1_2: ; %bb1
	; CI-NEXT: s_endpgm			; CI-NEXT: s_endpgm
	;			;
	; GFX9-LABEL: is_local_sgpr:			; GFX9-LABEL: is_local_sgpr:
	; GFX9: ; %bb.0:			; GFX9: ; %bb.0:
	; GFX9-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; GFX9-NEXT: s_waitcnt lgkmcnt(0)			; GFX9-NEXT: s_waitcnt lgkmcnt(0)
	; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)			; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
	; GFX9-NEXT: s_lshl_b32 s0, s0, 16			; GFX9-NEXT: s_lshl_b32 s0, s0, 16
	; GFX9-NEXT: s_cmp_lg_u32 s1, s0			; GFX9-NEXT: s_cmp_lg_u32 s1, s0
	; GFX9-NEXT: s_cbranch_scc1 .LBB1_2			; GFX9-NEXT: s_cbranch_scc1 .LBB1_2
	; GFX9-NEXT: ; %bb.1: ; %bb0			; GFX9-NEXT: ; %bb.1: ; %bb0
	; GFX9-NEXT: v_mov_b32_e32 v0, 0			; GFX9-NEXT: v_mov_b32_e32 v0, 0
	; GFX9-NEXT: global_store_dword v[0:1], v0, off			; GFX9-NEXT: global_store_dword v[0:1], v0, off
	; GFX9-NEXT: s_waitcnt vmcnt(0)			; GFX9-NEXT: s_waitcnt vmcnt(0)
	; GFX9-NEXT: .LBB1_2: ; %bb1			; GFX9-NEXT: .LBB1_2: ; %bb1
	; GFX9-NEXT: s_endpgm			; GFX9-NEXT: s_endpgm
	;			;
	; GFX10-LABEL: is_local_sgpr:			; GFX10-LABEL: is_local_sgpr:
	; GFX10: ; %bb.0:			; GFX10: ; %bb.0:
	; GFX10-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; GFX10-NEXT: s_waitcnt lgkmcnt(0)			; GFX10-NEXT: s_waitcnt lgkmcnt(0)
	; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)			; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
	; GFX10-NEXT: s_lshl_b32 s0, s0, 16			; GFX10-NEXT: s_lshl_b32 s0, s0, 16
	; GFX10-NEXT: s_cmp_lg_u32 s1, s0			; GFX10-NEXT: s_cmp_lg_u32 s1, s0
	; GFX10-NEXT: s_cbranch_scc1 .LBB1_2			; GFX10-NEXT: s_cbranch_scc1 .LBB1_2
	; GFX10-NEXT: ; %bb.1: ; %bb0			; GFX10-NEXT: ; %bb.1: ; %bb0
	; GFX10-NEXT: v_mov_b32_e32 v0, 0			; GFX10-NEXT: v_mov_b32_e32 v0, 0
	; GFX10-NEXT: global_store_dword v[0:1], v0, off			; GFX10-NEXT: global_store_dword v[0:1], v0, off
	Show All 18 Lines

llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll

	Show First 20 Lines • Show All 87 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: .size: 8			; CHECK-NEXT: .size: 8
	; CHECK-NEXT: .value_kind: hidden_completion_action			; CHECK-NEXT: .value_kind: hidden_completion_action
	; GFX8-NEXT: - .offset: 216			; GFX8-NEXT: - .offset: 216
	; GFX8-NEXT: .size: 4			; GFX8-NEXT: .size: 4
	; GFX8-NEXT: .value_kind: hidden_private_base			; GFX8-NEXT: .value_kind: hidden_private_base
	; GFX8-NEXT: - .offset: 220			; GFX8-NEXT: - .offset: 220
	; GFX8-NEXT: .size: 4			; GFX8-NEXT: .size: 4
	; GFX8-NEXT: .value_kind: hidden_shared_base			; GFX8-NEXT: .value_kind: hidden_shared_base
	; CHECK-NEXT: - .address_space: global			; CHECK: - .address_space: global
	; CHECK-NEXT: .offset: 224			; CHECK-NEXT: .offset: 224
	; CHECK-NEXT: .size: 8			; CHECK-NEXT: .size: 8
	; CHECK-NEXT: .value_kind: hidden_queue_ptr			; CHECK-NEXT: .value_kind: hidden_queue_ptr

	; CHECK: .name: test_v5			; CHECK: .name: test_v5
	; CHECK: .symbol: test_v5.kd			; CHECK: .symbol: test_v5.kd

	; CHECK: amdhsa.version:			; CHECK: amdhsa.version:
	Show All 21 Lines

llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll

Show All 18 Lines	define amdgpu_kernel void @addrspacecast_requires_queue_ptr(i32 addrspace(5)* %ptr.private, i32 addrspace(3)* %ptr.local) {
%flat.private = addrspacecast i32 addrspace(5)* %ptr.private to i32*		%flat.private = addrspacecast i32 addrspace(5)* %ptr.private to i32*
%flat.local = addrspacecast i32 addrspace(3)* %ptr.local to i32*		%flat.local = addrspacecast i32 addrspace(3)* %ptr.local to i32*
store volatile i32 1, i32* %flat.private		store volatile i32 1, i32* %flat.private
store volatile i32 2, i32* %flat.local		store volatile i32 2, i32* %flat.local
ret void		ret void
}		}

; CHECK: - .args:		; CHECK: - .args:
; CHECK: .offset: 208		; CHECK: .value_kind: hidden_multigrid_sync_arg
; CHECK-NEXT: .size: 8		; PRE-GFX9: .offset: 200
; CHECK-NEXT: .value_kind: hidden_queue_ptr		; PRE-GFX9-NEXT: .size: 4
		; PRE-GFX9-NEXT: .value_kind: hidden_private_base
		; PRE-GFX9-NEXT: .offset: 204
		; PRE-GFX9-NEXT: .size: 4
		; PRE-GFX9-NEXT: .value_kind: hidden_shared_base
		; GFX9-NOT: .value_kind: hidden_multigrid_sync_arg
		; GFX9-NOT: .value_kind: hidden_private_base
		; CKECK-NOT: .value_kind: hidden_queue_ptr
; CHECK: .name: is_shared_requires_queue_ptr		; CHECK: .name: is_shared_requires_queue_ptr
; CHECK: .symbol: is_shared_requires_queue_ptr.kd		; CHECK: .symbol: is_shared_requires_queue_ptr.kd
define amdgpu_kernel void @is_shared_requires_queue_ptr(i8* %ptr) {		define amdgpu_kernel void @is_shared_requires_queue_ptr(i8* %ptr) {
%is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr)		%is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
%zext = zext i1 %is.shared to i32		%zext = zext i1 %is.shared to i32
store volatile i32 %zext, i32 addrspace(1)* undef		store volatile i32 %zext, i32 addrspace(1)* undef
ret void		ret void
}		}

; CHECK: - .args:		; CHECK: - .args:
; CHECK: .offset: 208		; CHECK: .value_kind: hidden_multigrid_sync_arg
; CHECK-NEXT: .size: 8		; PRE-GFX9: .offset: 200
; CHECK-NEXT: .value_kind: hidden_queue_ptr		; PRE-GFX9-NEXT: .size: 4
		; PRE-GFX9-NEXT: .value_kind: hidden_private_base
		; PRE-GFX9-NEXT: .offset: 204
		; PRE-GFX9-NEXT: .size: 4
		; PRE-GFX9-NEXT: .value_kind: hidden_shared_base
		; GFX9-NOT: .value_kind: hidden_private_base
		; GFX9-NOT: .value_kind: hidden_shared_base
		; CKECK-NOT: .value_kind: hidden_queue_ptr
; CHECK: .name: is_private_requires_queue_ptr		; CHECK: .name: is_private_requires_queue_ptr
; CHECK: .symbol: is_private_requires_queue_ptr.kd		; CHECK: .symbol: is_private_requires_queue_ptr.kd
define amdgpu_kernel void @is_private_requires_queue_ptr(i8* %ptr) {		define amdgpu_kernel void @is_private_requires_queue_ptr(i8* %ptr) {
%is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr)		%is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr)
%zext = zext i1 %is.private to i32		%zext = zext i1 %is.private to i32
store volatile i32 %zext, i32 addrspace(1)* undef		store volatile i32 %zext, i32 addrspace(1)* undef
ret void		ret void
}		}

; CHECK: - .args:		; CHECK: - .args:
; CHECK: .offset: 200		; CHECK: .value_kind: hidden_multigrid_sync_arg
; CHECK-NEXT: .size: 8		; PRE-GFX9: .offset: 192
; CHECK-NEXT: .value_kind: hidden_queue_ptr		; PRE-GFX9-NEXT: .size: 4
		; PRE-GFX9-NEXT: .value_kind: hidden_private_base
		; PRE-GFX9-NEXT: .offset: 196
		; PRE-GFX9-NEXT: .size: 4
		; PRE-GFX9-NEXT: .value_kind: hidden_shared_base
		; PRE-GFX9-NEXT: .address_space: global
		; PRE-GFX9-NEXT: .offset: 200
		; PRE-GFX9-NEXT: .size: 8
		; PRE-GFX9-NEXT: .value_kind: hidden_queue_ptr
		; GFX9-NOT: .value_kind: hidden_private_base
		; GFX9-NOT: .value_kind: hidden_shared_base
		; GFX9-NOT: .value_kind: hidden_queue_ptr
; CHECK: .name: trap_requires_queue_ptr		; CHECK: .name: trap_requires_queue_ptr
; CHECK: .symbol: trap_requires_queue_ptr.kd		; CHECK: .symbol: trap_requires_queue_ptr.kd
define amdgpu_kernel void @trap_requires_queue_ptr() {		define amdgpu_kernel void @trap_requires_queue_ptr() {
call void @llvm.trap()		call void @llvm.trap()
unreachable		unreachable
}		}

; CHECK: - .args:		; CHECK: - .args:
; CHECK: .offset: 200
; CHECK-NEXT: .size: 8
; CHECK-NEXT: .value_kind: hidden_queue_ptr
; CHECK: .name: debugtrap_requires_queue_ptr
; CHECK: .symbol: debugtrap_requires_queue_ptr.kd
define amdgpu_kernel void @debugtrap_requires_queue_ptr() {
call void @llvm.debugtrap()
unreachable
}

; CHECK: - .args:
; CHECK: .offset: 208		; CHECK: .offset: 208
; CHECK-NEXT: .size: 8		; CHECK-NEXT: .size: 8
; CHECK-NEXT: .value_kind: hidden_queue_ptr		; CHECK-NEXT: .value_kind: hidden_queue_ptr
; CHECK: .name: amdgcn_queue_ptr_requires_queue_ptr		; CHECK: .name: amdgcn_queue_ptr_requires_queue_ptr
; CHECK: .symbol: amdgcn_queue_ptr_requires_queue_ptr.kd		; CHECK: .symbol: amdgcn_queue_ptr_requires_queue_ptr.kd
define amdgpu_kernel void @amdgcn_queue_ptr_requires_queue_ptr(i64 addrspace(1)* %ptr) {		define amdgpu_kernel void @amdgcn_queue_ptr_requires_queue_ptr(i64 addrspace(1)* %ptr) {
%queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()		%queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()		%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
Show All 18 Lines

llvm/test/CodeGen/AMDGPU/hsa-metadata-queueptr-v5.ll

This file was added.

				; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 -filetype=obj -o - < %s \| llvm-readelf --notes - \| FileCheck %s
				; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s \| FileCheck --check-prefix=CHECK %s

				declare void @function1()

				declare void @function2() #0

				; Function Attrs: noinline
				define void @function3(i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink) #2 {
				store i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink, align 8
				ret void
				}

				; Function Attrs: noinline
				define void @function4(i64 %arg, i64* %a) #2 {
				store i64 %arg, i64* %a
				ret void
				}

				; Function Attrs: noinline
				define void @function5(i8 addrspace(4)* %ptr, i64* %sink) #2 {
				%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 168
				%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
				%x = load i64, i64 addrspace(4)* %cast
				store i64 %x, i64* %sink
				ret void
				}

				; Function Attrs: nounwind readnone speculatable willreturn
				declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1

				; CHECK: amdhsa.kernels:
				; CHECK: - .args:
				; CHECK-NOT: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel10
				define amdgpu_kernel void @test_kernel10(i8* %a) {
				store i8 3, i8* %a, align 1
				ret void
				}

				; Call to an extern function

				; CHECK: - .args:
				; CHECK: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel20
				define amdgpu_kernel void @test_kernel20(i8* %a) {
				call void @function1()
				store i8 3, i8* %a, align 1
				ret void
				}

				; Explicit attribute on kernel

				; CHECK: - .args:
				; CHECK-NOT: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel21
				define amdgpu_kernel void @test_kernel21(i8* %a) #0 {
				call void @function1()
				store i8 3, i8* %a, align 1
				ret void
				}

				; Explicit attribute on extern callee

				; CHECK: - .args:
				; CHECK-NOT: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel22
				define amdgpu_kernel void @test_kernel22(i8* %a) {
				call void @function2()
				store i8 3, i8* %a, align 1
				ret void
				}

				; Access more bytes than the pointer size

				; CHECK: - .args:
				; CHECK: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel30
				define amdgpu_kernel void @test_kernel30(i128* %a) {
				%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
				%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 192
				%cast = bitcast i8 addrspace(4)* %gep to i128 addrspace(4)*
				%x = load i128, i128 addrspace(4)* %cast
				store i128 %x, i128* %a
				ret void
				}

				; Typical load of queue pointer

				; CHECK: - .args:
				; CHECK: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel40
				define amdgpu_kernel void @test_kernel40(i64* %a) {
				%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
				%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 200
				%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
				%x = load i64, i64 addrspace(4)* %cast
				store i64 %x, i64* %a
				ret void
				}

				; Typical usage, overriden by explicit attribute on kernel

				; CHECK: - .args:
				; CHECK-NOT: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel41
				define amdgpu_kernel void @test_kernel41(i64* %a) #0 {
				%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
				%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 200
				%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
				%x = load i64, i64 addrspace(4)* %cast
				store i64 %x, i64* %a
				ret void
				}

				; Access to implicit arg before the queue pointer

				; CHECK: - .args:
				; CHECK-NOT: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel42
				define amdgpu_kernel void @test_kernel42(i64* %a) {
				%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
				%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 192
				%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
				%x = load i64, i64 addrspace(4)* %cast
				store i64 %x, i64* %a
				ret void
				}

				; Access to implicit arg after the queue pointer

				; CHECK: - .args:
				; CHECK-NOT: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel43
				define amdgpu_kernel void @test_kernel43(i64* %a) {
				%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
				%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 208
				%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
				%x = load i64, i64 addrspace(4)* %cast
				store i64 %x, i64* %a
				ret void
				}

				; Accessing a byte just before the queue pointer

				; CHECK: - .args:
				; CHECK-NOT: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel44
				define amdgpu_kernel void @test_kernel44(i8* %a) {
				%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
				%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 199
				%x = load i8, i8 addrspace(4)* %gep, align 1
				store i8 %x, i8* %a, align 1
				ret void
				}

				; Accessing a byte inside the queue pointer

				; CHECK: - .args:
				; CHECK: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel45
				define amdgpu_kernel void @test_kernel45(i8* %a) {
				%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
				%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 200
				%x = load i8, i8 addrspace(4)* %gep, align 1
				store i8 %x, i8* %a, align 1
				ret void
				}

				; Accessing a byte inside the queue pointer

				; CHECK: - .args:
				; CHECK: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel46
				define amdgpu_kernel void @test_kernel46(i8* %a) {
				%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
				%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 207
				%x = load i8, i8 addrspace(4)* %gep, align 1
				store i8 %x, i8* %a, align 1
				ret void
				}

				; Accessing a byte just after the queue pointer

				; CHECK: - .args:
				; CHECK-NOT: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel47
				define amdgpu_kernel void @test_kernel47(i8* %a) {
				%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
				%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 208
				%x = load i8, i8 addrspace(4)* %gep, align 1
				store i8 %x, i8* %a, align 1
				ret void
				}

				; Access with an unknown offset

				; CHECK: - .args:
				; CHECK: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel50
				define amdgpu_kernel void @test_kernel50(i8* %a, i32 %b) {
				%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
				%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 %b
				%x = load i8, i8 addrspace(4)* %gep, align 1
				store i8 %x, i8* %a, align 1
				ret void
				}

				; Multiple geps reaching the queue pointer argument.

				; CHECK: - .args:
				; CHECK: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel51
				define amdgpu_kernel void @test_kernel51(i8* %a) {
				%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
				%gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
				%gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 184
				%x = load i8, i8 addrspace(4)* %gep2, align 1
				store i8 %x, i8* %a, align 1
				ret void
				}

				; Multiple geps not reaching the queue pointer argument.

				; CHECK: - .args:
				; CHECK-NOT: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel52
				define amdgpu_kernel void @test_kernel52(i8* %a) {
				%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
				%gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
				%gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 16
				%x = load i8, i8 addrspace(4)* %gep2, align 1
				store i8 %x, i8* %a, align 1
				ret void
				}

				; Queue pointer used inside a function call

				; CHECK: - .args:
				; CHECK: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel60
				define amdgpu_kernel void @test_kernel60(i64* %a) #2 {
				%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
				%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 200
				%cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
				%x = load i64, i64 addrspace(4)* %cast
				call void @function4(i64 %x, i64* %a)
				ret void
				}

				; Queue pointer retrieved inside a function call; chain of geps

				; CHECK: - .args:
				; CHECK: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel61
				define amdgpu_kernel void @test_kernel61(i64* %a) #2 {
				%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
				%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 32
				call void @function5(i8 addrspace(4)* %gep, i64* %a)
				ret void
				}

				; Pointer captured

				; CHECK: - .args:
				; CHECK: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel70
				define amdgpu_kernel void @test_kernel70(i8 addrspace(4)* addrspace(1)* %sink) #2 {
				%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
				%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
				store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink, align 8
				ret void
				}

				; Pointer captured inside function call

				; CHECK: - .args:
				; CHECK: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel71
				define amdgpu_kernel void @test_kernel71(i8 addrspace(4)* addrspace(1)* %sink) #2 {
				%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
				%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
				call void @function3(i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink)
				ret void
				}

				; Ineffective pointer capture

				; CHECK: - .args:
				; CHECK-NOT: hidden_queue_ptr
				; CHECK-LABEL: .name: test_kernel72
				define amdgpu_kernel void @test_kernel72() #2 {
				%ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
				%gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
				store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* undef, align 8
				ret void
				}

				attributes #0 = { "amdgpu-no-queue-ptr" }
				attributes #1 = { nounwind readnone speculatable willreturn }
				attributes #2 = { noinline }

llvm/test/CodeGen/AMDGPU/kernarg-size.ll

	; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s \| FileCheck --check-prefix=HSA %s			; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s \| FileCheck --check-prefix=DOORBELL %s
	; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s \| FileCheck --check-prefix=HSA %s			; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s \| FileCheck --check-prefix=DOORBELL %s
	; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s \| FileCheck --check-prefix=HSA %s			; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s \| FileCheck --check-prefix=HSA %s

	declare void @llvm.trap() #0			declare void @llvm.trap() #0
	declare void @llvm.debugtrap() #1

	; HSA: .amdhsa_kernel trap			; HSA: .amdhsa_kernel trap
	; HSA-NEXT: .amdhsa_group_segment_fixed_size 0			; HSA-NEXT: .amdhsa_group_segment_fixed_size 0
	; HSA-NEXT: .amdhsa_private_segment_fixed_size 0			; HSA-NEXT: .amdhsa_private_segment_fixed_size 0
	; HSA-NEXT: .amdhsa_kernarg_size 8			; HSA-NEXT: .amdhsa_kernarg_size 8
	; HSA-NEXT: .amdhsa_user_sgpr_count 8			; HSA-NEXT: .amdhsa_user_sgpr_count 8
	; HSA-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1			; HSA-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
	; HSA: .end_amdhsa_kernel			; HSA: .end_amdhsa_kernel

				; DOORBELL: .amdhsa_kernel trap
				; DOORBELL-NEXT: .amdhsa_group_segment_fixed_size 0
				; DOORBELL-NEXT: .amdhsa_private_segment_fixed_size 0
				; DOORBELL-NEXT: .amdhsa_kernarg_size 8
				; DOORBELL-NEXT: .amdhsa_user_sgpr_count 6
				; DOORBELL-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
				; DOORBELL: .end_amdhsa_kernel

	define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) {			define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) {
	store volatile i32 1, i32 addrspace(1)* %arg0			store volatile i32 1, i32 addrspace(1)* %arg0
	call void @llvm.trap()			call void @llvm.trap()
	unreachable			unreachable
	store volatile i32 2, i32 addrspace(1)* %arg0			store volatile i32 2, i32 addrspace(1)* %arg0
	ret void			ret void
	}			}

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll

	Show All 20 Lines
	; FIXME: setcc (zero_extend (setcc)), 1) not folded out, resulting in			; FIXME: setcc (zero_extend (setcc)), 1) not folded out, resulting in
	; select and vcc branch.			; select and vcc branch.

	; GCN-LABEL: {{^}}is_private_sgpr:			; GCN-LABEL: {{^}}is_private_sgpr:
	; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}			; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}
	; GFX9-DAG: s_getreg_b32 [[APERTURE:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16)			; GFX9-DAG: s_getreg_b32 [[APERTURE:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16)

	; CI-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[6:7], 0x1{{$}}			; CI-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[6:7], 0x1{{$}}
	; GFX9-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[6:7], 0x4{{$}}			; GFX9-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[4:5], 0x4{{$}}
	; GFX9: s_lshl_b32 [[APERTURE]], [[APERTURE]], 16			; GFX9: s_lshl_b32 [[APERTURE]], [[APERTURE]], 16

	; GCN: s_cmp_eq_u32 [[PTR_HI]], [[APERTURE]]			; GCN: s_cmp_eq_u32 [[PTR_HI]], [[APERTURE]]
	; GCN: s_cbranch_vccnz			; GCN: s_cbranch_vccnz
	define amdgpu_kernel void @is_private_sgpr(i8* %ptr) {			define amdgpu_kernel void @is_private_sgpr(i8* %ptr) {
	%val = call i1 @llvm.amdgcn.is.private(i8* %ptr)			%val = call i1 @llvm.amdgcn.is.private(i8* %ptr)
	br i1 %val, label %bb0, label %bb1			br i1 %val, label %bb0, label %bb1

	Show All 12 Lines

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll

	Show All 22 Lines
	; select and vcc branch.			; select and vcc branch.

	; GCN-LABEL: {{^}}is_local_sgpr:			; GCN-LABEL: {{^}}is_local_sgpr:
	; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}			; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
	; GFX9-DAG: s_getreg_b32 [[APERTURE:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16)			; GFX9-DAG: s_getreg_b32 [[APERTURE:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16)
	; GFX9-DAG: s_lshl_b32 [[APERTURE]], [[APERTURE]], 16			; GFX9-DAG: s_lshl_b32 [[APERTURE]], [[APERTURE]], 16

	; CI-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[6:7], 0x1{{$}}			; CI-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[6:7], 0x1{{$}}
	; GFX9-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[6:7], 0x4{{$}}			; GFX9-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[4:5], 0x4{{$}}

	; GCN: s_cmp_eq_u32 [[PTR_HI]], [[APERTURE]]			; GCN: s_cmp_eq_u32 [[PTR_HI]], [[APERTURE]]
	; GCN: s_cbranch_vccnz			; GCN: s_cbranch_vccnz
	define amdgpu_kernel void @is_local_sgpr(i8* %ptr) {			define amdgpu_kernel void @is_local_sgpr(i8* %ptr) {
	%val = call i1 @llvm.amdgcn.is.shared(i8* %ptr)			%val = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
	br i1 %val, label %bb0, label %bb1			br i1 %val, label %bb0, label %bb1

	bb0:			bb0:
	Show All 11 Lines

llvm/test/CodeGen/AMDGPU/trap-abis.ll

	Show All 32 Lines
	; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1			; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1
	; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)			; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
	; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]			; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
	; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)			; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
	; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm			; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm
	;			;
	; NOHSA-TRAP-GFX900-V4-LABEL: trap:			; NOHSA-TRAP-GFX900-V4-LABEL: trap:
	; NOHSA-TRAP-GFX900-V4: ; %bb.0:			; NOHSA-TRAP-GFX900-V4: ; %bb.0:
	; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24			; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
	; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0			; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
	; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1			; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1
	; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)			; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
	; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]			; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
	; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)			; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
	; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm			; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm
	;			;
	; HSA-TRAP-GFX803-V2-LABEL: trap:			; HSA-TRAP-GFX803-V2-LABEL: trap:
	▲ Show 20 Lines • Show All 187 Lines • ▼ Show 20 Lines
	; HSA-TRAP-GFX900-V3-NEXT: s_mov_b64 s[0:1], s[4:5]			; HSA-TRAP-GFX900-V3-NEXT: s_mov_b64 s[0:1], s[4:5]
	; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)			; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
	; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[2:3]			; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[2:3]
	; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)			; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
	; HSA-TRAP-GFX900-V3-NEXT: s_trap 2			; HSA-TRAP-GFX900-V3-NEXT: s_trap 2
	;			;
	; HSA-TRAP-GFX900-V4-LABEL: trap:			; HSA-TRAP-GFX900-V4-LABEL: trap:
	; HSA-TRAP-GFX900-V4: ; %bb.0:			; HSA-TRAP-GFX900-V4: ; %bb.0:
	; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0			; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
	; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1			; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1
	; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)			; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
	; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]			; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
	; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)			; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
	; HSA-TRAP-GFX900-V4-NEXT: s_trap 2			; HSA-TRAP-GFX900-V4-NEXT: s_trap 2
	;			;
	; HSA-NOTRAP-GFX900-V2-LABEL: trap:			; HSA-NOTRAP-GFX900-V2-LABEL: trap:
	▲ Show 20 Lines • Show All 81 Lines • ▼ Show 20 Lines
	; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1			; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1
	; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)			; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
	; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]			; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
	; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)			; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
	; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm			; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm
	;			;
	; HSA-NOTRAP-GFX900-V4-LABEL: trap:			; HSA-NOTRAP-GFX900-V4-LABEL: trap:
	; HSA-NOTRAP-GFX900-V4: ; %bb.0:			; HSA-NOTRAP-GFX900-V4: ; %bb.0:
	; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0			; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
	; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1			; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1
	; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)			; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
	; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]			; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
	; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)			; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
	; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm			; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm
	store volatile i32 1, i32 addrspace(1)* %arg0			store volatile i32 1, i32 addrspace(1)* %arg0
	call void @llvm.trap()			call void @llvm.trap()
	Show All 34 Lines
	; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]			; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
	; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)			; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
	; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm			; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm
	; NOHSA-TRAP-GFX900-V3-NEXT: .LBB1_2: ; %trap			; NOHSA-TRAP-GFX900-V3-NEXT: .LBB1_2: ; %trap
	; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm			; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm
	;			;
	; NOHSA-TRAP-GFX900-V4-LABEL: non_entry_trap:			; NOHSA-TRAP-GFX900-V4-LABEL: non_entry_trap:
	; NOHSA-TRAP-GFX900-V4: ; %bb.0: ; %entry			; NOHSA-TRAP-GFX900-V4: ; %bb.0: ; %entry
	; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24			; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
	; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0			; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
	; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)			; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
	; NOHSA-TRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc			; NOHSA-TRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc
	; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)			; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
	; NOHSA-TRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1			; NOHSA-TRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
	; NOHSA-TRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2			; NOHSA-TRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2
	; NOHSA-TRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret			; NOHSA-TRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret
	; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3			; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3
	▲ Show 20 Lines • Show All 235 Lines • ▼ Show 20 Lines
	; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)			; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
	; HSA-TRAP-GFX900-V3-NEXT: s_endpgm			; HSA-TRAP-GFX900-V3-NEXT: s_endpgm
	; HSA-TRAP-GFX900-V3-NEXT: .LBB1_2: ; %trap			; HSA-TRAP-GFX900-V3-NEXT: .LBB1_2: ; %trap
	; HSA-TRAP-GFX900-V3-NEXT: s_mov_b64 s[0:1], s[4:5]			; HSA-TRAP-GFX900-V3-NEXT: s_mov_b64 s[0:1], s[4:5]
	; HSA-TRAP-GFX900-V3-NEXT: s_trap 2			; HSA-TRAP-GFX900-V3-NEXT: s_trap 2
	;			;
	; HSA-TRAP-GFX900-V4-LABEL: non_entry_trap:			; HSA-TRAP-GFX900-V4-LABEL: non_entry_trap:
	; HSA-TRAP-GFX900-V4: ; %bb.0: ; %entry			; HSA-TRAP-GFX900-V4: ; %bb.0: ; %entry
	; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0			; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
	; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)			; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
	; HSA-TRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc			; HSA-TRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc
	; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)			; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
	; HSA-TRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1			; HSA-TRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
	; HSA-TRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2			; HSA-TRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2
	; HSA-TRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret			; HSA-TRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret
	; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3			; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3
	▲ Show 20 Lines • Show All 102 Lines • ▼ Show 20 Lines
	; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]			; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
	; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)			; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
	; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm			; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm
	; HSA-NOTRAP-GFX900-V3-NEXT: .LBB1_2: ; %trap			; HSA-NOTRAP-GFX900-V3-NEXT: .LBB1_2: ; %trap
	; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm			; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm
	;			;
	; HSA-NOTRAP-GFX900-V4-LABEL: non_entry_trap:			; HSA-NOTRAP-GFX900-V4-LABEL: non_entry_trap:
	; HSA-NOTRAP-GFX900-V4: ; %bb.0: ; %entry			; HSA-NOTRAP-GFX900-V4: ; %bb.0: ; %entry
	; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0			; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
	; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)			; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
	; HSA-NOTRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc			; HSA-NOTRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc
	; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)			; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
	; HSA-NOTRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1			; HSA-NOTRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
	; HSA-NOTRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2			; HSA-NOTRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2
	; HSA-NOTRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret			; HSA-NOTRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret
	; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3			; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3
	Show All 14 Lines
	ret:			ret:
	store volatile i32 3, i32 addrspace(1)* %arg0			store volatile i32 3, i32 addrspace(1)* %arg0
	ret void			ret void
	}			}

	define amdgpu_kernel void @debugtrap(i32 addrspace(1)* nocapture readonly %arg0) {			define amdgpu_kernel void @debugtrap(i32 addrspace(1)* nocapture readonly %arg0) {
	; NOHSA-TRAP-GFX900-V2-LABEL: debugtrap:			; NOHSA-TRAP-GFX900-V2-LABEL: debugtrap:
	; NOHSA-TRAP-GFX900-V2: ; %bb.0:			; NOHSA-TRAP-GFX900-V2: ; %bb.0:
	; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24			; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
	; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0			; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0
	; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1			; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1
	; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v2, 2			; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v2, 2
	; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0)			; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0)
	; NOHSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1]			; NOHSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1]
	; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)			; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
	; NOHSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v2, s[0:1]			; NOHSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v2, s[0:1]
	; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)			; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
	; NOHSA-TRAP-GFX900-V2-NEXT: s_endpgm			; NOHSA-TRAP-GFX900-V2-NEXT: s_endpgm
	;			;
	; NOHSA-TRAP-GFX900-V3-LABEL: debugtrap:			; NOHSA-TRAP-GFX900-V3-LABEL: debugtrap:
	; NOHSA-TRAP-GFX900-V3: ; %bb.0:			; NOHSA-TRAP-GFX900-V3: ; %bb.0:
	; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24			; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
	; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0			; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
	; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1			; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1
	; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2			; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2
	; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)			; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
	; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]			; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
	; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)			; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
	; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1]			; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1]
	; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)			; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
	; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm			; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm
	;			;
	; NOHSA-TRAP-GFX900-V4-LABEL: debugtrap:			; NOHSA-TRAP-GFX900-V4-LABEL: debugtrap:
	; NOHSA-TRAP-GFX900-V4: ; %bb.0:			; NOHSA-TRAP-GFX900-V4: ; %bb.0:
	; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24			; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
	; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0			; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
	; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1			; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1
	; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2			; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2
	; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)			; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
	; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]			; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
	; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)			; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
	; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1]			; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1]
	; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)			; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
	Show All 16 Lines
	; HSA-TRAP-GFX803-V2-NEXT: priv = 0			; HSA-TRAP-GFX803-V2-NEXT: priv = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_dx10_clamp = 1			; HSA-TRAP-GFX803-V2-NEXT: enable_dx10_clamp = 1
	; HSA-TRAP-GFX803-V2-NEXT: debug_mode = 0			; HSA-TRAP-GFX803-V2-NEXT: debug_mode = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_ieee_mode = 1			; HSA-TRAP-GFX803-V2-NEXT: enable_ieee_mode = 1
	; HSA-TRAP-GFX803-V2-NEXT: enable_wgp_mode = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_wgp_mode = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_mem_ordered = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_mem_ordered = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_fwd_progress = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_fwd_progress = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
	; HSA-TRAP-GFX803-V2-NEXT: user_sgpr_count = 8			; HSA-TRAP-GFX803-V2-NEXT: user_sgpr_count = 6
	; HSA-TRAP-GFX803-V2-NEXT: enable_trap_handler = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_trap_handler = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_x = 1			; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_x = 1
	; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_y = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_y = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_z = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_id_z = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_info = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_workgroup_info = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_vgpr_workitem_id = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_vgpr_workitem_id = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_exception_msb = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_exception_msb = 0
	; HSA-TRAP-GFX803-V2-NEXT: granulated_lds_size = 0			; HSA-TRAP-GFX803-V2-NEXT: granulated_lds_size = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_exception = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_exception = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_buffer = 1			; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_buffer = 1
	; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_ptr = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_ptr = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_queue_ptr = 1			; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_queue_ptr = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1			; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1
	; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_id = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_dispatch_id = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_flat_scratch_init = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_flat_scratch_init = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_size = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_private_segment_size = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_wavefront_size32 = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_wavefront_size32 = 0
	; HSA-TRAP-GFX803-V2-NEXT: enable_ordered_append_gds = 0			; HSA-TRAP-GFX803-V2-NEXT: enable_ordered_append_gds = 0
	; HSA-TRAP-GFX803-V2-NEXT: private_element_size = 1			; HSA-TRAP-GFX803-V2-NEXT: private_element_size = 1
	; HSA-TRAP-GFX803-V2-NEXT: is_ptr64 = 1			; HSA-TRAP-GFX803-V2-NEXT: is_ptr64 = 1
	; HSA-TRAP-GFX803-V2-NEXT: is_dynamic_callstack = 0			; HSA-TRAP-GFX803-V2-NEXT: is_dynamic_callstack = 0
	; HSA-TRAP-GFX803-V2-NEXT: is_debug_enabled = 0			; HSA-TRAP-GFX803-V2-NEXT: is_debug_enabled = 0
	; HSA-TRAP-GFX803-V2-NEXT: is_xnack_enabled = 0			; HSA-TRAP-GFX803-V2-NEXT: is_xnack_enabled = 0
	; HSA-TRAP-GFX803-V2-NEXT: workitem_private_segment_byte_size = 0			; HSA-TRAP-GFX803-V2-NEXT: workitem_private_segment_byte_size = 0
	; HSA-TRAP-GFX803-V2-NEXT: workgroup_group_segment_byte_size = 0			; HSA-TRAP-GFX803-V2-NEXT: workgroup_group_segment_byte_size = 0
	; HSA-TRAP-GFX803-V2-NEXT: gds_segment_byte_size = 0			; HSA-TRAP-GFX803-V2-NEXT: gds_segment_byte_size = 0
	; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_byte_size = 8			; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_byte_size = 8
	; HSA-TRAP-GFX803-V2-NEXT: workgroup_fbarrier_count = 0			; HSA-TRAP-GFX803-V2-NEXT: workgroup_fbarrier_count = 0
	; HSA-TRAP-GFX803-V2-NEXT: wavefront_sgpr_count = 8			; HSA-TRAP-GFX803-V2-NEXT: wavefront_sgpr_count = 6
	; HSA-TRAP-GFX803-V2-NEXT: workitem_vgpr_count = 4			; HSA-TRAP-GFX803-V2-NEXT: workitem_vgpr_count = 4
	; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_first = 0			; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_first = 0
	; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_count = 0			; HSA-TRAP-GFX803-V2-NEXT: reserved_vgpr_count = 0
	; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_first = 0			; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_first = 0
	; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_count = 0			; HSA-TRAP-GFX803-V2-NEXT: reserved_sgpr_count = 0
	; HSA-TRAP-GFX803-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0			; HSA-TRAP-GFX803-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
	; HSA-TRAP-GFX803-V2-NEXT: debug_private_segment_buffer_sgpr = 0			; HSA-TRAP-GFX803-V2-NEXT: debug_private_segment_buffer_sgpr = 0
	; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_alignment = 4			; HSA-TRAP-GFX803-V2-NEXT: kernarg_segment_alignment = 4
	; HSA-TRAP-GFX803-V2-NEXT: group_segment_alignment = 4			; HSA-TRAP-GFX803-V2-NEXT: group_segment_alignment = 4
	; HSA-TRAP-GFX803-V2-NEXT: private_segment_alignment = 4			; HSA-TRAP-GFX803-V2-NEXT: private_segment_alignment = 4
	; HSA-TRAP-GFX803-V2-NEXT: wavefront_size = 6			; HSA-TRAP-GFX803-V2-NEXT: wavefront_size = 6
	; HSA-TRAP-GFX803-V2-NEXT: call_convention = -1			; HSA-TRAP-GFX803-V2-NEXT: call_convention = -1
	; HSA-TRAP-GFX803-V2-NEXT: runtime_loader_kernel_symbol = 0			; HSA-TRAP-GFX803-V2-NEXT: runtime_loader_kernel_symbol = 0
	; HSA-TRAP-GFX803-V2-NEXT: .end_amd_kernel_code_t			; HSA-TRAP-GFX803-V2-NEXT: .end_amd_kernel_code_t
	; HSA-TRAP-GFX803-V2-NEXT: ; %bb.0:			; HSA-TRAP-GFX803-V2-NEXT: ; %bb.0:
	; HSA-TRAP-GFX803-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; HSA-TRAP-GFX803-V2-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v2, 1			; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v2, 1
	; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v3, 2			; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v3, 2
	; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt lgkmcnt(0)			; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt lgkmcnt(0)
	; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v0, s0			; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v0, s0
	; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v1, s1			; HSA-TRAP-GFX803-V2-NEXT: v_mov_b32_e32 v1, s1
	; HSA-TRAP-GFX803-V2-NEXT: flat_store_dword v[0:1], v2			; HSA-TRAP-GFX803-V2-NEXT: flat_store_dword v[0:1], v2
	; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0)			; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0)
	; HSA-TRAP-GFX803-V2-NEXT: s_trap 3			; HSA-TRAP-GFX803-V2-NEXT: s_trap 3
	; HSA-TRAP-GFX803-V2-NEXT: flat_store_dword v[0:1], v3			; HSA-TRAP-GFX803-V2-NEXT: flat_store_dword v[0:1], v3
	; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0)			; HSA-TRAP-GFX803-V2-NEXT: s_waitcnt vmcnt(0)
	; HSA-TRAP-GFX803-V2-NEXT: s_endpgm			; HSA-TRAP-GFX803-V2-NEXT: s_endpgm
	;			;
	; HSA-TRAP-GFX803-V3-LABEL: debugtrap:			; HSA-TRAP-GFX803-V3-LABEL: debugtrap:
	; HSA-TRAP-GFX803-V3: ; %bb.0:			; HSA-TRAP-GFX803-V3: ; %bb.0:
	; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v2, 1			; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v2, 1
	; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v3, 2			; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v3, 2
	; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt lgkmcnt(0)			; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt lgkmcnt(0)
	; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s0			; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s0
	; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s1			; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s1
	; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v2			; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v2
	; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0)			; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0)
	; HSA-TRAP-GFX803-V3-NEXT: s_trap 3			; HSA-TRAP-GFX803-V3-NEXT: s_trap 3
	; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v3			; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v3
	; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0)			; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0)
	; HSA-TRAP-GFX803-V3-NEXT: s_endpgm			; HSA-TRAP-GFX803-V3-NEXT: s_endpgm
	;			;
	; HSA-TRAP-GFX803-V4-LABEL: debugtrap:			; HSA-TRAP-GFX803-V4-LABEL: debugtrap:
	; HSA-TRAP-GFX803-V4: ; %bb.0:			; HSA-TRAP-GFX803-V4: ; %bb.0:
	; HSA-TRAP-GFX803-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; HSA-TRAP-GFX803-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v2, 1			; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v2, 1
	; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v3, 2			; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v3, 2
	; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt lgkmcnt(0)			; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt lgkmcnt(0)
	; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s0			; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s0
	; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s1			; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s1
	; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v2			; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v2
	; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0)			; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0)
	; HSA-TRAP-GFX803-V4-NEXT: s_trap 3			; HSA-TRAP-GFX803-V4-NEXT: s_trap 3
	Show All 18 Lines
	; HSA-TRAP-GFX900-V2-NEXT: priv = 0			; HSA-TRAP-GFX900-V2-NEXT: priv = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1			; HSA-TRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1
	; HSA-TRAP-GFX900-V2-NEXT: debug_mode = 0			; HSA-TRAP-GFX900-V2-NEXT: debug_mode = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1			; HSA-TRAP-GFX900-V2-NEXT: enable_ieee_mode = 1
	; HSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_wgp_mode = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_mem_ordered = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_fwd_progress = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
	; HSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 8			; HSA-TRAP-GFX900-V2-NEXT: user_sgpr_count = 6
	; HSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_trap_handler = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1			; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1
	; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_exception_msb = 0
	; HSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0			; HSA-TRAP-GFX900-V2-NEXT: granulated_lds_size = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_exception = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_exception = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1			; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1
	; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1			; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1			; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1
	; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0
	; HSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0			; HSA-TRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0
	; HSA-TRAP-GFX900-V2-NEXT: private_element_size = 1			; HSA-TRAP-GFX900-V2-NEXT: private_element_size = 1
	; HSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1			; HSA-TRAP-GFX900-V2-NEXT: is_ptr64 = 1
	; HSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0			; HSA-TRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0
	; HSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0			; HSA-TRAP-GFX900-V2-NEXT: is_debug_enabled = 0
	; HSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1			; HSA-TRAP-GFX900-V2-NEXT: is_xnack_enabled = 1
	; HSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0			; HSA-TRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0
	; HSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0			; HSA-TRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0
	; HSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0			; HSA-TRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
	; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8			; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
	; HSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0			; HSA-TRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
	; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8			; HSA-TRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 6
	; HSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 3			; HSA-TRAP-GFX900-V2-NEXT: workitem_vgpr_count = 3
	; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0			; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
	; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0			; HSA-TRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
	; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0			; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0
	; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0			; HSA-TRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0
	; HSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0			; HSA-TRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
	; HSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0			; HSA-TRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0
	; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4			; HSA-TRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4
	; HSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4			; HSA-TRAP-GFX900-V2-NEXT: group_segment_alignment = 4
	; HSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4			; HSA-TRAP-GFX900-V2-NEXT: private_segment_alignment = 4
	; HSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6			; HSA-TRAP-GFX900-V2-NEXT: wavefront_size = 6
	; HSA-TRAP-GFX900-V2-NEXT: call_convention = -1			; HSA-TRAP-GFX900-V2-NEXT: call_convention = -1
	; HSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0			; HSA-TRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0
	; HSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t			; HSA-TRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t
	; HSA-TRAP-GFX900-V2-NEXT: ; %bb.0:			; HSA-TRAP-GFX900-V2-NEXT: ; %bb.0:
	; HSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; HSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0			; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0
	; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1			; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1
	; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v2, 2			; HSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v2, 2
	; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0)			; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0)
	; HSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1]			; HSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1]
	; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)			; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
	; HSA-TRAP-GFX900-V2-NEXT: s_trap 3			; HSA-TRAP-GFX900-V2-NEXT: s_trap 3
	; HSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v2, s[0:1]			; HSA-TRAP-GFX900-V2-NEXT: global_store_dword v0, v2, s[0:1]
	; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)			; HSA-TRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
	; HSA-TRAP-GFX900-V2-NEXT: s_endpgm			; HSA-TRAP-GFX900-V2-NEXT: s_endpgm
	;			;
	; HSA-TRAP-GFX900-V3-LABEL: debugtrap:			; HSA-TRAP-GFX900-V3-LABEL: debugtrap:
	; HSA-TRAP-GFX900-V3: ; %bb.0:			; HSA-TRAP-GFX900-V3: ; %bb.0:
	; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0			; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
	; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1			; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1
	; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2			; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2
	; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)			; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
	; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]			; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
	; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)			; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
	; HSA-TRAP-GFX900-V3-NEXT: s_trap 3			; HSA-TRAP-GFX900-V3-NEXT: s_trap 3
	; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1]			; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1]
	; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)			; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
	; HSA-TRAP-GFX900-V3-NEXT: s_endpgm			; HSA-TRAP-GFX900-V3-NEXT: s_endpgm
	;			;
	; HSA-TRAP-GFX900-V4-LABEL: debugtrap:			; HSA-TRAP-GFX900-V4-LABEL: debugtrap:
	; HSA-TRAP-GFX900-V4: ; %bb.0:			; HSA-TRAP-GFX900-V4: ; %bb.0:
	; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0			; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
	; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1			; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1
	; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2			; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2
	; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)			; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
	; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]			; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
	; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)			; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
	; HSA-TRAP-GFX900-V4-NEXT: s_trap 3			; HSA-TRAP-GFX900-V4-NEXT: s_trap 3
	; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1]			; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1]
	Show All 17 Lines
	; HSA-NOTRAP-GFX900-V2-NEXT: priv = 0			; HSA-NOTRAP-GFX900-V2-NEXT: priv = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1			; HSA-NOTRAP-GFX900-V2-NEXT: enable_dx10_clamp = 1
	; HSA-NOTRAP-GFX900-V2-NEXT: debug_mode = 0			; HSA-NOTRAP-GFX900-V2-NEXT: debug_mode = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_ieee_mode = 1			; HSA-NOTRAP-GFX900-V2-NEXT: enable_ieee_mode = 1
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_wgp_mode = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_wgp_mode = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_mem_ordered = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_mem_ordered = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_fwd_progress = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_fwd_progress = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: user_sgpr_count = 8			; HSA-NOTRAP-GFX900-V2-NEXT: user_sgpr_count = 6
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_trap_handler = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_trap_handler = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1			; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_x = 1
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_y = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_id_z = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_workgroup_info = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_vgpr_workitem_id = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception_msb = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception_msb = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: granulated_lds_size = 0			; HSA-NOTRAP-GFX900-V2-NEXT: granulated_lds_size = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_exception = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1			; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_buffer = 1
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_ptr = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 1			; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_queue_ptr = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1			; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_kernarg_segment_ptr = 1
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_dispatch_id = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_flat_scratch_init = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_private_segment_size = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_x = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_y = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_sgpr_grid_workgroup_count_z = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_wavefront_size32 = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0			; HSA-NOTRAP-GFX900-V2-NEXT: enable_ordered_append_gds = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: private_element_size = 1			; HSA-NOTRAP-GFX900-V2-NEXT: private_element_size = 1
	; HSA-NOTRAP-GFX900-V2-NEXT: is_ptr64 = 1			; HSA-NOTRAP-GFX900-V2-NEXT: is_ptr64 = 1
	; HSA-NOTRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0			; HSA-NOTRAP-GFX900-V2-NEXT: is_dynamic_callstack = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: is_debug_enabled = 0			; HSA-NOTRAP-GFX900-V2-NEXT: is_debug_enabled = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: is_xnack_enabled = 1			; HSA-NOTRAP-GFX900-V2-NEXT: is_xnack_enabled = 1
	; HSA-NOTRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0			; HSA-NOTRAP-GFX900-V2-NEXT: workitem_private_segment_byte_size = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0			; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_group_segment_byte_size = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0			; HSA-NOTRAP-GFX900-V2-NEXT: gds_segment_byte_size = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8			; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_byte_size = 8
	; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0			; HSA-NOTRAP-GFX900-V2-NEXT: workgroup_fbarrier_count = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 8			; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_sgpr_count = 6
	; HSA-NOTRAP-GFX900-V2-NEXT: workitem_vgpr_count = 3			; HSA-NOTRAP-GFX900-V2-NEXT: workitem_vgpr_count = 3
	; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0			; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_first = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0			; HSA-NOTRAP-GFX900-V2-NEXT: reserved_vgpr_count = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0			; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_first = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0			; HSA-NOTRAP-GFX900-V2-NEXT: reserved_sgpr_count = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0			; HSA-NOTRAP-GFX900-V2-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0			; HSA-NOTRAP-GFX900-V2-NEXT: debug_private_segment_buffer_sgpr = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4			; HSA-NOTRAP-GFX900-V2-NEXT: kernarg_segment_alignment = 4
	; HSA-NOTRAP-GFX900-V2-NEXT: group_segment_alignment = 4			; HSA-NOTRAP-GFX900-V2-NEXT: group_segment_alignment = 4
	; HSA-NOTRAP-GFX900-V2-NEXT: private_segment_alignment = 4			; HSA-NOTRAP-GFX900-V2-NEXT: private_segment_alignment = 4
	; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_size = 6			; HSA-NOTRAP-GFX900-V2-NEXT: wavefront_size = 6
	; HSA-NOTRAP-GFX900-V2-NEXT: call_convention = -1			; HSA-NOTRAP-GFX900-V2-NEXT: call_convention = -1
	; HSA-NOTRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0			; HSA-NOTRAP-GFX900-V2-NEXT: runtime_loader_kernel_symbol = 0
	; HSA-NOTRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t			; HSA-NOTRAP-GFX900-V2-NEXT: .end_amd_kernel_code_t
	; HSA-NOTRAP-GFX900-V2-NEXT: ; %bb.0:			; HSA-NOTRAP-GFX900-V2-NEXT: ; %bb.0:
	; HSA-NOTRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; HSA-NOTRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0			; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0
	; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1			; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1
	; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v2, 2			; HSA-NOTRAP-GFX900-V2-NEXT: v_mov_b32_e32 v2, 2
	; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0)			; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0)
	; HSA-NOTRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1]			; HSA-NOTRAP-GFX900-V2-NEXT: global_store_dword v0, v1, s[0:1]
	; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)			; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
	; HSA-NOTRAP-GFX900-V2-NEXT: global_store_dword v0, v2, s[0:1]			; HSA-NOTRAP-GFX900-V2-NEXT: global_store_dword v0, v2, s[0:1]
	; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)			; HSA-NOTRAP-GFX900-V2-NEXT: s_waitcnt vmcnt(0)
	; HSA-NOTRAP-GFX900-V2-NEXT: s_endpgm			; HSA-NOTRAP-GFX900-V2-NEXT: s_endpgm
	;			;
	; HSA-NOTRAP-GFX900-V3-LABEL: debugtrap:			; HSA-NOTRAP-GFX900-V3-LABEL: debugtrap:
	; HSA-NOTRAP-GFX900-V3: ; %bb.0:			; HSA-NOTRAP-GFX900-V3: ; %bb.0:
	; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0			; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
	; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1			; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1
	; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2			; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2
	; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)			; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
	; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]			; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
	; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)			; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
	; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1]			; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1]
	; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)			; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
	; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm			; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm
	;			;
	; HSA-NOTRAP-GFX900-V4-LABEL: debugtrap:			; HSA-NOTRAP-GFX900-V4-LABEL: debugtrap:
	; HSA-NOTRAP-GFX900-V4: ; %bb.0:			; HSA-NOTRAP-GFX900-V4: ; %bb.0:
	; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0			; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
	; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0			; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
	; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1			; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1
	; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2			; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2
	; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)			; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
	; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]			; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
	; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)			; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
	; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1]			; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1]
	; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)			; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
	Show All 9 Lines

llvm/test/CodeGen/AMDGPU/trap.ll

Show First 20 Lines • Show All 45 Lines • ▼ Show 20 Lines	define amdgpu_kernel void @hsa_trap(i32 addrspace(1)* nocapture readonly %arg0) {
call void @llvm.trap()		call void @llvm.trap()
unreachable		unreachable
store volatile i32 2, i32 addrspace(1)* %arg0		store volatile i32 2, i32 addrspace(1)* %arg0
ret void		ret void
}		}

; MESA-TRAP: .section .AMDGPU.config		; MESA-TRAP: .section .AMDGPU.config
; MESA-TRAP: .long 47180		; MESA-TRAP: .long 47180
; MESA-TRAP-NEXT: .long 208		; MESA-TRAP-NEXT: .long 204

; NOMESA-TRAP: .section .AMDGPU.config		; NOMESA-TRAP: .section .AMDGPU.config
; NOMESA-TRAP: .long 47180		; NOMESA-TRAP: .long 47180
; NOMESA-TRAP-NEXT: .long 144		; NOMESA-TRAP-NEXT: .long 140

; GCN-LABEL: {{^}}hsa_debugtrap:		; GCN-LABEL: {{^}}hsa_debugtrap:
; HSA-TRAP: enable_trap_handler = 0		; HSA-TRAP: enable_trap_handler = 0
; HSA-TRAP: s_trap 3		; HSA-TRAP: s_trap 3
; HSA-TRAP: flat_store_dword v[0:1], v3		; HSA-TRAP: flat_store_dword v[0:1], v3

; for llvm.debugtrap in non-hsa path without ABI, generate a warning and a s_endpgm instruction		; for llvm.debugtrap in non-hsa path without ABI, generate a warning and a s_endpgm instruction
; NO-HSA-TRAP: enable_trap_handler = 0		; NO-HSA-TRAP: enable_trap_handler = 0
▲ Show 20 Lines • Show All 50 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU: Set up User SGPRs for queue_ptr only when necessary
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 414143

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll

llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll

llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll

llvm/test/CodeGen/AMDGPU/hsa-metadata-queueptr-v5.ll

llvm/test/CodeGen/AMDGPU/kernarg-size.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll

llvm/test/CodeGen/AMDGPU/trap-abis.ll

llvm/test/CodeGen/AMDGPU/trap.ll

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU: Set up User SGPRs for queue_ptr only when necessaryClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 414143

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll

llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll

llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll

llvm/test/CodeGen/AMDGPU/hsa-metadata-queueptr-v5.ll

llvm/test/CodeGen/AMDGPU/kernarg-size.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll

llvm/test/CodeGen/AMDGPU/trap-abis.ll

llvm/test/CodeGen/AMDGPU/trap.ll

AMDGPU: Set up User SGPRs for queue_ptr only when necessary
ClosedPublic