Diff 307616

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h

Show First 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	void findNumUsedRegistersSI(const MachineFunction &MF,
unsigned &NumVGPR) const;		unsigned &NumVGPR) const;

/// Emit register usage information so that the GPU driver		/// Emit register usage information so that the GPU driver
/// can correctly setup the GPU state.		/// can correctly setup the GPU state.
void EmitProgramInfoSI(const MachineFunction &MF,		void EmitProgramInfoSI(const MachineFunction &MF,
const SIProgramInfo &KernelInfo);		const SIProgramInfo &KernelInfo);
void EmitPALMetadata(const MachineFunction &MF,		void EmitPALMetadata(const MachineFunction &MF,
const SIProgramInfo &KernelInfo);		const SIProgramInfo &KernelInfo);
		void emitPALFunctionMetadata(const MachineFunction &MF);
void emitCommonFunctionComments(uint32_t NumVGPR,		void emitCommonFunctionComments(uint32_t NumVGPR,
Optional<uint32_t> NumAGPR,		Optional<uint32_t> NumAGPR,
uint32_t TotalNumVGPR,		uint32_t TotalNumVGPR,
uint32_t NumSGPR,		uint32_t NumSGPR,
uint64_t ScratchSize,		uint64_t ScratchSize,
uint64_t CodeSize,		uint64_t CodeSize,
const AMDGPUMachineFunction* MFI);		const AMDGPUMachineFunction* MFI);

▲ Show 20 Lines • Show All 65 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Show First 20 Lines • Show All 450 Lines • ▼ Show 20 Lines	bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
} else {		} else {
auto I = CallGraphResourceInfo.insert(		auto I = CallGraphResourceInfo.insert(
std::make_pair(&MF.getFunction(), SIFunctionResourceInfo()));		std::make_pair(&MF.getFunction(), SIFunctionResourceInfo()));
SIFunctionResourceInfo &Info = I.first->second;		SIFunctionResourceInfo &Info = I.first->second;
assert(I.second && "should only be called once per function");		assert(I.second && "should only be called once per function");
Info = analyzeResourceUsage(MF);		Info = analyzeResourceUsage(MF);
}		}

if (STM.isAmdPalOS() && MFI->isEntryFunction())		if (STM.isAmdPalOS()) {
		if (MFI->isEntryFunction())
EmitPALMetadata(MF, CurrentProgramInfo);		EmitPALMetadata(MF, CurrentProgramInfo);
else if (!STM.isAmdHsaOS()) {		else
		emitPALFunctionMetadata(MF);
		} else if (!STM.isAmdHsaOS()) {
EmitProgramInfoSI(MF, CurrentProgramInfo);		EmitProgramInfoSI(MF, CurrentProgramInfo);
}		}

DumpCodeInstEmitter = nullptr;		DumpCodeInstEmitter = nullptr;
if (STM.dumpCode()) {		if (STM.dumpCode()) {
// For -dumpcode, get the assembler out of the streamer, even if it does		// For -dumpcode, get the assembler out of the streamer, even if it does
// not really want to let us have it. This only works with -filetype=obj.		// not really want to let us have it. This only works with -filetype=obj.
bool SaveFlag = OutStreamer->getUseAssemblerInfoForParsing();		bool SaveFlag = OutStreamer->getUseAssemblerInfoForParsing();
▲ Show 20 Lines • Show All 785 Lines • ▼ Show 20 Lines	if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
MD->setSpiPsInputAddr(MFI->getPSInputAddr());		MD->setSpiPsInputAddr(MFI->getPSInputAddr());
}		}

const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();		const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
if (STM.isWave32())		if (STM.isWave32())
MD->setWave32(MF.getFunction().getCallingConv());		MD->setWave32(MF.getFunction().getCallingConv());
}		}

		void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
		auto *MD = getTargetStreamer()->getPALMetadata();
		const MachineFrameInfo &MFI = MF.getFrameInfo();
		arsenmUnsubmitted Not Done Reply Inline Actions Seems like this should emit it for any non-entry convention for AMDPAL, not just AMDGPU_gfx arsenm: Seems like this should emit it for any non-entry convention for AMDPAL, not just AMDGPU_gfx
		MD->setStackFrameSize(MF, MFI.getStackSize());
		}

// This is supposed to be log2(Size)		// This is supposed to be log2(Size)
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {		static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
switch (Size) {		switch (Size) {
case 4:		case 4:
return AMD_ELEMENT_4_BYTES;		return AMD_ELEMENT_4_BYTES;
case 8:		case 8:
return AMD_ELEMENT_8_BYTES;		return AMD_ELEMENT_8_BYTES;
case 16:		case 16:
▲ Show 20 Lines • Show All 107 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h

Show All 9 Lines
/// PAL metadata handling		/// PAL metadata handling
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H		#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H		#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H

#include "llvm/BinaryFormat/MsgPackDocument.h"		#include "llvm/BinaryFormat/MsgPackDocument.h"
		#include "llvm/CodeGen/MachineFunction.h"

namespace llvm {		namespace llvm {

class Module;		class Module;
class StringRef;		class StringRef;

class AMDGPUPALMetadata {		class AMDGPUPALMetadata {
unsigned BlobType = 0;		unsigned BlobType = 0;
msgpack::Document MsgPackDoc;		msgpack::Document MsgPackDoc;
msgpack::DocNode Registers;		msgpack::DocNode Registers;
msgpack::DocNode HwStages;		msgpack::DocNode HwStages;
		msgpack::DocNode ShaderFunctions;

public:		public:
// Read the amdgpu.pal.metadata supplied by the frontend, ready for		// Read the amdgpu.pal.metadata supplied by the frontend, ready for
// per-function modification.		// per-function modification.
void readFromIR(Module &M);		void readFromIR(Module &M);

// Set PAL metadata from a binary blob from the applicable .note record.		// Set PAL metadata from a binary blob from the applicable .note record.
// Returns false if bad format. Blob must remain valid for the lifetime of		// Returns false if bad format. Blob must remain valid for the lifetime of
Show All 34 Lines	public:
// Set the number of used sgprs in the metadata. This is an optional advisory		// Set the number of used sgprs in the metadata. This is an optional advisory
// record for logging etc; wave dispatch actually uses the rsrc1 register for		// record for logging etc; wave dispatch actually uses the rsrc1 register for
// the shader stage to determine the number of sgprs to allocate.		// the shader stage to determine the number of sgprs to allocate.
void setNumUsedSgprs(unsigned CC, unsigned Val);		void setNumUsedSgprs(unsigned CC, unsigned Val);

// Set the scratch size in the metadata.		// Set the scratch size in the metadata.
void setScratchSize(unsigned CC, unsigned Val);		void setScratchSize(unsigned CC, unsigned Val);

		// Set the stack frame size of a function in the metadata.
		void setStackFrameSize(const MachineFunction &MF, unsigned Val);

// Set the hardware register bit in PAL metadata to enable wave32 on the		// Set the hardware register bit in PAL metadata to enable wave32 on the
// shader of the given calling convention.		// shader of the given calling convention.
void setWave32(unsigned CC);		void setWave32(unsigned CC);

// Emit the accumulated PAL metadata as asm directives.		// Emit the accumulated PAL metadata as asm directives.
// This is called from AMDGPUTargetAsmStreamer::Finish().		// This is called from AMDGPUTargetAsmStreamer::Finish().
void toString(std::string &S);		void toString(std::string &S);

Show All 27 Lines	private:
bool isLegacy() const;		bool isLegacy() const;

// Reference (create if necessary) the node for the registers map.		// Reference (create if necessary) the node for the registers map.
msgpack::DocNode &refRegisters();		msgpack::DocNode &refRegisters();

// Get (create if necessary) the registers map.		// Get (create if necessary) the registers map.
msgpack::MapDocNode getRegisters();		msgpack::MapDocNode getRegisters();

		// Reference (create if necessary) the node for the shader functions map.
		msgpack::DocNode &refShaderFunctions();

		// Get (create if necessary) the shader functions map.
		msgpack::MapDocNode getShaderFunctions();

// Get (create if necessary) the .hardware_stages entry for the given calling		// Get (create if necessary) the .hardware_stages entry for the given calling
// convention.		// convention.
msgpack::MapDocNode getHwStage(unsigned CC);		msgpack::MapDocNode getHwStage(unsigned CC);

bool setFromLegacyBlob(StringRef Blob);		bool setFromLegacyBlob(StringRef Blob);
bool setFromMsgPackBlob(StringRef Blob);		bool setFromMsgPackBlob(StringRef Blob);
void toLegacyBlob(std::string &Blob);		void toLegacyBlob(std::string &Blob);
void toMsgPackBlob(std::string &Blob);		void toMsgPackBlob(std::string &Blob);
};		};

} // end namespace llvm		} // end namespace llvm

#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H		#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H

llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp

Show First 20 Lines • Show All 232 Lines • ▼ Show 20 Lines	if (isLegacy()) {
// Old non-msgpack format.		// Old non-msgpack format.
setRegister(getScratchSizeKey(CC), Val);		setRegister(getScratchSizeKey(CC), Val);
return;		return;
}		}
// Msgpack format.		// Msgpack format.
getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val);		getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val);
}		}

		// Set the scratch size in the metadata.
		void AMDGPUPALMetadata::setStackFrameSize(const MachineFunction &MF,
		unsigned Val) {
		auto Node = MsgPackDoc.getMapNode();
		Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val);
		getShaderFunctions()[MF.getFunction().getName()] = Node;
		}

// Set the hardware register bit in PAL metadata to enable wave32 on the		// Set the hardware register bit in PAL metadata to enable wave32 on the
// shader of the given calling convention.		// shader of the given calling convention.
void AMDGPUPALMetadata::setWave32(unsigned CC) {		void AMDGPUPALMetadata::setWave32(unsigned CC) {
switch (CC) {		switch (CC) {
case CallingConv::AMDGPU_HS:		case CallingConv::AMDGPU_HS:
setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_HS_W32_EN(1));		setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_HS_W32_EN(1));
break;		break;
case CallingConv::AMDGPU_GS:		case CallingConv::AMDGPU_GS:
▲ Show 20 Lines • Show All 467 Lines • ▼ Show 20 Lines

// Get (create if necessary) the registers map.		// Get (create if necessary) the registers map.
msgpack::MapDocNode AMDGPUPALMetadata::getRegisters() {		msgpack::MapDocNode AMDGPUPALMetadata::getRegisters() {
if (Registers.isEmpty())		if (Registers.isEmpty())
Registers = refRegisters();		Registers = refRegisters();
return Registers.getMap();		return Registers.getMap();
}		}

		// Reference (create if necessary) the node for the shader functions map.
		msgpack::DocNode &AMDGPUPALMetadata::refShaderFunctions() {
		auto &N =
		MsgPackDoc.getRoot()
		.getMap(/Convert=/true)[MsgPackDoc.getNode("amdpal.pipelines")]
		.getArray(/Convert=/true)[0]
		.getMap(/Convert=/true)[MsgPackDoc.getNode(".shader_functions")];
		N.getMap(/Convert=/true);
		return N;
		}

		// Get (create if necessary) the shader functions map.
		msgpack::MapDocNode AMDGPUPALMetadata::getShaderFunctions() {
		if (ShaderFunctions.isEmpty())
		ShaderFunctions = refShaderFunctions();
		return ShaderFunctions.getMap();
		}

// Return the PAL metadata hardware shader stage name.		// Return the PAL metadata hardware shader stage name.
static const char *getStageName(CallingConv::ID CC) {		static const char *getStageName(CallingConv::ID CC) {
switch (CC) {		switch (CC) {
case CallingConv::AMDGPU_PS:		case CallingConv::AMDGPU_PS:
return ".ps";		return ".ps";
case CallingConv::AMDGPU_VS:		case CallingConv::AMDGPU_VS:
return ".vs";		return ".vs";
case CallingConv::AMDGPU_GS:		case CallingConv::AMDGPU_GS:
▲ Show 20 Lines • Show All 55 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/amdpal-callable.ll

	; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=SI -enable-var-scope %s			; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
	; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VI -enable-var-scope %s			; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
	; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s			; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
	; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s			; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN,GISEL -enable-var-scope %s
				arsenmUnsubmitted Not Done Reply Inline Actions Should include a test with a non-0 size, and a case with a variable sized stack object, and a case with transitively used stack from a callee arsenm: Should include a test with a non-0 size, and a case with a variable sized stack object, and a…

	; GCN-LABEL: {{^}}gfx_callable_amdpal:			declare float @extern_func(float) #0
	; GCN: .amdgpu_pal_metadata			declare float @extern_func_many_args(<64 x float>) #0
	; GCN-NEXT: ---
	; GCN-NEXT: amdpal.pipelines:			@funcptr = external hidden unnamed_addr addrspace(4) constant void()*, align 4

				define amdgpu_gfx float @no_stack(float %arg0) #0 {
				%add = fadd float %arg0, 1.0
				ret float %add
				}

				define amdgpu_gfx float @simple_stack(float %arg0) #0 {
				%stack = alloca float, i32 4, align 4, addrspace(5)
				store volatile float 2.0, float addrspace(5)* %stack
				%val = load volatile float, float addrspace(5)* %stack
				%add = fadd float %arg0, %val
				ret float %add
				}

				define amdgpu_gfx float @multiple_stack(float %arg0) #0 {
				%stack = alloca float, i32 4, align 4, addrspace(5)
				store volatile float 2.0, float addrspace(5)* %stack
				%val = load volatile float, float addrspace(5)* %stack
				%add = fadd float %arg0, %val
				%stack2 = alloca float, i32 4, align 4, addrspace(5)
				store volatile float 2.0, float addrspace(5)* %stack2
				%val2 = load volatile float, float addrspace(5)* %stack2
				%add2 = fadd float %add, %val2
				ret float %add2
				}

				define amdgpu_gfx float @dynamic_stack(float %arg0) #0 {
				bb0:
				%cmp = fcmp ogt float %arg0, 0.0
				br i1 %cmp, label %bb1, label %bb2

				bb1:
				%stack = alloca float, i32 4, align 4, addrspace(5)
				store volatile float 2.0, float addrspace(5)* %stack
				%val = load volatile float, float addrspace(5)* %stack
				%add = fadd float %arg0, %val
				br label %bb2

				bb2:
				%res = phi float [ 0.0, %bb0 ], [ %add, %bb1 ]
				ret float %res
				}

				define amdgpu_gfx float @dynamic_stack_loop(float %arg0) #0 {
				bb0:
				br label %bb1

				bb1:
				%ctr = phi i32 [ 0, %bb0 ], [ %newctr, %bb1 ]
				%stack = alloca float, i32 4, align 4, addrspace(5)
				store volatile float 2.0, float addrspace(5)* %stack
				%val = load volatile float, float addrspace(5)* %stack
				%add = fadd float %arg0, %val
				%cmp = icmp sgt i32 %ctr, 0
				%newctr = sub i32 %ctr, 1
				br i1 %cmp, label %bb1, label %bb2

				bb2:
				ret float %add
				}

				define amdgpu_gfx float @no_stack_call(float %arg0) #0 {
				%res = call amdgpu_gfx float @simple_stack(float %arg0)
				ret float %res
				}

				define amdgpu_gfx float @simple_stack_call(float %arg0) #0 {
				%stack = alloca float, i32 4, align 4, addrspace(5)
				store volatile float 2.0, float addrspace(5)* %stack
				%val = load volatile float, float addrspace(5)* %stack
				%res = call amdgpu_gfx float @simple_stack(float %arg0)
				%add = fadd float %res, %val
				ret float %add
				}

				define amdgpu_gfx float @no_stack_extern_call(float %arg0) #0 {
				%res = call amdgpu_gfx float @extern_func(float %arg0)
				ret float %res
				}

				define amdgpu_gfx float @simple_stack_extern_call(float %arg0) #0 {
				%stack = alloca float, i32 4, align 4, addrspace(5)
				store volatile float 2.0, float addrspace(5)* %stack
				%val = load volatile float, float addrspace(5)* %stack
				%res = call amdgpu_gfx float @extern_func(float %arg0)
				%add = fadd float %res, %val
				ret float %add
				}

				define amdgpu_gfx float @no_stack_extern_call_many_args(<64 x float> %arg0) #0 {
				%res = call amdgpu_gfx float @extern_func_many_args(<64 x float> %arg0)
				ret float %res
				}

				define amdgpu_gfx float @no_stack_indirect_call(float %arg0) #0 {
				%fptr = load void(), void() addrspace(4)* @funcptr
				call amdgpu_gfx void %fptr()
				ret float %arg0
				}

				define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) #0 {
				%stack = alloca float, i32 4, align 4, addrspace(5)
				store volatile float 2.0, float addrspace(5)* %stack
				%val = load volatile float, float addrspace(5)* %stack
				%fptr = load void(), void() addrspace(4)* @funcptr
				call amdgpu_gfx void %fptr()
				%add = fadd float %arg0, %val
				ret float %add
				}

				define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
				%stack = alloca float, i32 4, align 4, addrspace(5)
				store volatile float 2.0, float addrspace(5)* %stack
				%val = load volatile float, float addrspace(5)* %stack
				%res = call amdgpu_gfx float @simple_stack_recurse(float %arg0)
				%add = fadd float %res, %val
				ret float %add
				}

				attributes #0 = { nounwind }

				; GCN: amdpal.pipelines:
	; GCN-NEXT: - .registers: {}			; GCN-NEXT: - .registers: {}
				; GCN-NEXT: .shader_functions:
				; GCN-NEXT: dynamic_stack:
				; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
				arsenmUnsubmitted Not Done Reply Inline Actions This doesn't really represent the unknown nature of the stack size, but I guess there isn't a key for this yet? arsenm: This doesn't really represent the unknown nature of the stack size, but I guess there isn't a…
				FlakebiAuthorUnsubmitted Not Done Reply Inline Actions I guess it should be the maximum that the function needs at any point, so 0x10 sounds right? Flakebi: I guess it should be the maximum that the function needs at any point, so 0x10 sounds right?
				foadUnsubmitted Not Done Reply Inline Actions Then what would it report if there was an alloca in a loop? foad: Then what would it report if there was an alloca in a loop?
				FlakebiAuthorUnsubmitted Not Done Reply Inline Actions The same as with an if, I think allocas in a loop don’t stack, so there should be a maximum stack usage that can be statically computed. Flakebi: The same as with an if, I think allocas in a loop don’t stack, so there should be a maximum…
				foadUnsubmitted Not Done Reply Inline Actions Yes they do stack. It's used for implementing standard(ish) C alloca(). foad: Yes they do stack. It's used for implementing standard(ish) C alloca().
				FlakebiAuthorUnsubmitted Not Done Reply Inline Actions Hm ok, I guess we’re doomed then. Flakebi: Hm ok, I guess we’re doomed then.
				nhaehnleUnsubmitted Not Done Reply Inline Actions For the use cases we care about in graphics, we should never have `alloca` outside of the function entry point, and therefore the stack frame size is always a constant. We could interpret "stack_frame_size_in_bytes" as the minimum stack frame size and add a boolean field "stack_frame_size_dynamic". We're not going to need it for a long time though. nhaehnle: For the use cases we care about in graphics, we should never have `alloca` outside of the…
				; GCN-NEXT: dynamic_stack_loop:
				; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
				; GCN-NEXT: multiple_stack:
				; GCN-NEXT: .stack_frame_size_in_bytes: 0x24{{$}}
				; GCN-NEXT: no_stack:
				; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
				; GCN-NEXT: no_stack_call:
				; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
				; GCN-NEXT: no_stack_extern_call:
				; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
				; GCN-NEXT: no_stack_extern_call_many_args:
				; SDAG-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
				; GISEL-NEXT: .stack_frame_size_in_bytes: 0xd0{{$}}
				; GCN-NEXT: no_stack_indirect_call:
				; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
				; GCN-NEXT: simple_stack:
				; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}}
				; GCN-NEXT: simple_stack_call:
				; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
				; GCN-NEXT: simple_stack_extern_call:
				; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
				; GCN-NEXT: simple_stack_indirect_call:
				; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
				; GCN-NEXT: simple_stack_recurse:
				; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
	; GCN-NEXT: ...			; GCN-NEXT: ...
				arsenmUnsubmitted Done Reply Inline Actions What do you mean unsupported? These do work (in SelectionDAG you can use a constant size alloca out side of the entry block to behave as-if) arsenm: What do you mean unsupported? These do work (in SelectionDAG you can use a constant size alloca…
	; GCN-NEXT: .end_amdgpu_pal_metadata
	define amdgpu_gfx half @gfx_callable_amdpal(half %arg0) {
	%add = fadd half %arg0, 1.0
	ret half %add
	}

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Emit stack frame size in metadata
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 307616

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h

llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp

llvm/test/CodeGen/AMDGPU/amdpal-callable.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Emit stack frame size in metadataClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 307616

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h

llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp

llvm/test/CodeGen/AMDGPU/amdpal-callable.ll

[AMDGPU] Emit stack frame size in metadata
ClosedPublic