Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -993,7 +993,9 @@ ProgInfo.FlatUsed = Info.UsesFlatScratch; ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion; - if (!isUInt<32>(ProgInfo.ScratchSize)) { + const uint64_t MaxScratchPerWorkitem = + GCNSubtarget::MaxWaveScratchSize / STM.getWavefrontSize(); + if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) { DiagnosticInfoStackSize DiagStackSize(MF.getFunction(), ProgInfo.ScratchSize, DS_Error); MF.getFunction().getContext().diagnose(DiagStackSize); Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -422,10 +422,10 @@ SITargetLowering TLInfo; SIFrameLowering FrameLowering; +public: // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword. static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1); -public: GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM); ~GCNSubtarget() override; Index: llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll +++ llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll @@ -3,12 +3,45 @@ declare void @llvm.memset.p5i8.i32(i8 addrspace(5)* nocapture, i8, i32, i32, i1) #1 -; ERROR: error: stack size limit exceeded (4294967296) in stack_size_limit -; GCN: ; ScratchSize: 4294967296 -define amdgpu_kernel void @stack_size_limit() #0 { +; ERROR: error: stack size limit exceeded (131061) in stack_size_limit_wave64 +; GCN: ; ScratchSize: 131061 +define amdgpu_kernel void @stack_size_limit_wave64() #0 { entry: - %alloca = alloca [1073741823 x i32], align 4, addrspace(5) - %bc = bitcast [1073741823 x i32] addrspace(5)* %alloca to i8 addrspace(5)* - call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %bc, i8 9, i32 1073741823, i32 1, i1 true) + %alloca = alloca [131057 x i8], align 1, addrspace(5) + %alloca.bc = bitcast [131057 x i8] addrspace(5)* %alloca to i8 addrspace(5)* + call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 131057, i32 1, i1 true) ret void } + +; ERROR: error: stack size limit exceeded (262117) in stack_size_limit_wave32 +; GCN: ; ScratchSize: 262117 +define amdgpu_kernel void @stack_size_limit_wave32() #1 { +entry: + %alloca = alloca [262113 x i8], align 1, addrspace(5) + %alloca.bc = bitcast [262113 x i8] addrspace(5)* %alloca to i8 addrspace(5)* + call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 262113, i32 1, i1 true) + ret void +} + +; ERROR-NOT: error: +; GCN: ; ScratchSize: 131056 +define amdgpu_kernel void @max_stack_size_wave64() #0 { +entry: + %alloca = alloca [131052 x i8], align 1, addrspace(5) + %alloca.bc = bitcast [131052 x i8] addrspace(5)* %alloca to i8 addrspace(5)* + call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 131052, i32 1, i1 true) + ret void +} + +; ERROR-NOT: error: +; GCN: ; ScratchSize: 262112 +define amdgpu_kernel void @max_stack_size_wave32() #1 { +entry: + %alloca = alloca [262108 x i8], align 1, addrspace(5) + %alloca.bc = bitcast [262108 x i8] addrspace(5)* %alloca to i8 addrspace(5)* + call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 262108, i32 1, i1 true) + ret void +} + +attributes #0 = { "target-cpu" = "gfx900" } +attributes #1 = { "target-cpu" = "gfx1010" }