Index: llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp =================================================================== --- llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp +++ llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp @@ -236,7 +236,10 @@ MRI->getRegClass(Intf->reg()))); // Only evict older cascades or live ranges without a cascade. unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg()); - if (Cascade <= IntfCascade) { + if (Cascade == IntfCascade) + return false; + + if (Cascade < IntfCascade) { if (!Urgent) return false; // We permit breaking cascades for urgent evictions. It should be the Index: llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir @@ -0,0 +1,37 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: not llc -march=amdgcn -mcpu=gfx900 -start-before=greedy,0 -stop-after=virtregrewriter,1 -o - 2>%t.err %s | FileCheck %s +# RUN: FileCheck -check-prefix=ERR %s < %t.err + +# This testcase cannot be compiled. An attempted eviction legality +# check was inconsistent with a later assertion when the eviction was +# performed. + +# ERR: error: ran out of registers during register allocation + +--- | + define void @foo() #0 { + ret void + } + + attributes #0 = { "amdgpu-waves-per-eu"="8,8" } + +... + +# CHECK: S_NOP 0, implicit-def renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def dead renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def dead renamable $vgpr0_vgpr1_vgpr2_vgpr3 +# CHECK: S_NOP 0, implicit killed renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3 + +--- +name: foo +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + S_NOP 0, implicit-def %0:vreg_512, implicit-def %1:vreg_256, implicit-def %2:vreg_128, implicit-def %3:vreg_128, implicit-def %4:vreg_128 + + S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4 + S_ENDPGM 0 + +... Index: llvm/test/CodeGen/AMDGPU/regalloc-illegal-eviction-assert.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/regalloc-illegal-eviction-assert.ll @@ -0,0 +1,29 @@ +; RUN: not llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -o - %s 2>%t.err | FileCheck %s +; RUN: FileCheck -check-prefix=ERR %s < %t.err + +; This testcase would fail on an "illegal eviction". If the assert was +; relaxed to allow equivalent cascade numbers, it would infinite loop. + +; ERR: error: inline assembly requires more registers than available +; ERR: error: inline assembly requires more registers than available + +%asm.output = type { <16 x i32>, <8 x i32>, <5 x i32>, <4 x i32>, <16 x i32> } + +; CHECK-LABEL: {{^}}illegal_eviction_assert: +; CHECK: ; def v[4:19] v[20:27] v[0:4] v[0:3] a[0:15] +; CHECK: ; clobber +; CHECK: ; use v[4:19] v[20:27] v[0:4] v[0:3] a[1:16] +define void @illegal_eviction_assert(<32 x i32> addrspace(1)* %arg) #0 { + ;%agpr0 = call i32 asm sideeffect "; def $0","=${a0}"() + %asm = call %asm.output asm sideeffect "; def $0 $1 $2 $3 $4","=v,=v,=v,=v,={a[0:15]}"() + %vgpr0 = extractvalue %asm.output %asm, 0 + %vgpr1 = extractvalue %asm.output %asm, 1 + %vgpr2 = extractvalue %asm.output %asm, 2 + %vgpr3 = extractvalue %asm.output %asm, 3 + %agpr0 = extractvalue %asm.output %asm, 4 + call void asm sideeffect "; clobber", "~{v[0:31]}"() + call void asm sideeffect "; use $0 $1 $2 $3 $4","v,v,v,v,{a[1:16]}"(<16 x i32> %vgpr0, <8 x i32> %vgpr1, <5 x i32> %vgpr2, <4 x i32> %vgpr3, <16 x i32> %agpr0) + ret void +} + +attributes #0 = { "amdgpu-waves-per-eu"="8,8" }