Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -41,7 +41,7 @@ // the end are tracked separately. int32_t NumVGPR = 0; int32_t NumExplicitSGPR = 0; - uint32_t PrivateSegmentSize = 0; + uint64_t PrivateSegmentSize = 0; bool UsesVCC = false; bool UsesFlatScratch = false; bool HasDynamicallySizedStack = false; @@ -61,7 +61,7 @@ uint32_t DX10Clamp = 0; uint32_t DebugMode = 0; uint32_t IEEEMode = 0; - uint32_t ScratchSize = 0; + uint64_t ScratchSize = 0; uint64_t ComputePGMRSrc1 = 0; @@ -144,7 +144,7 @@ const SIProgramInfo &KernelInfo); void emitCommonFunctionComments(uint32_t NumVGPR, uint32_t NumSGPR, - uint32_t ScratchSize, + uint64_t ScratchSize, uint64_t CodeSize); public: Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -262,7 +262,7 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments( uint32_t NumVGPR, uint32_t NumSGPR, - uint32_t ScratchSize, + uint64_t ScratchSize, uint64_t CodeSize) { OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false); OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false); @@ -600,7 +600,7 @@ int32_t MaxVGPR = -1; int32_t MaxSGPR = -1; - uint32_t CalleeFrameSize = 0; + uint64_t CalleeFrameSize = 0; for (const MachineBasicBlock &MBB : MF) { for (const MachineInstr &MI : MBB) { @@ -718,7 +718,7 @@ MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess); MaxVGPR = std::max(MaxVGPR, 23); - CalleeFrameSize = std::max(CalleeFrameSize, 16384u); + CalleeFrameSize = std::max(CalleeFrameSize, UINT64_C(16384)); Info.UsesVCC = true; Info.UsesFlatScratch = ST.hasFlatAddressSpace(); Info.HasDynamicallySizedStack = true; @@ -763,6 +763,12 @@ ProgInfo.FlatUsed = Info.UsesFlatScratch; ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion; + if (!isUInt<32>(ProgInfo.ScratchSize)) { + DiagnosticInfoStackSize DiagStackSize(*MF.getFunction(), + ProgInfo.ScratchSize, DS_Error); + MF.getFunction()->getContext().diagnose(DiagStackSize); + } + const SISubtarget &STM = MF.getSubtarget(); const SIMachineFunctionInfo *MFI = MF.getInfo(); const SIInstrInfo *TII = STM.getInstrInfo(); Index: test/CodeGen/AMDGPU/stack-size-overflow.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/stack-size-overflow.ll @@ -0,0 +1,14 @@ +; RUN: not llc -march=amdgcn < %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: not llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s + +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #1 + +; ERROR: error: stack size limit exceeded (4294967296) in stack_size_limit +; GCN: ; ScratchSize: 4294967296 +define amdgpu_kernel void @stack_size_limit() #0 { +entry: + %alloca = alloca [1073741823 x i32], align 4 + %bc = bitcast [1073741823 x i32]* %alloca to i8* + call void @llvm.memset.p0i8.i32(i8* %bc, i8 9, i32 1073741823, i32 1, i1 true) + ret void +}