Index: llvm/lib/CodeGen/RegAllocBase.h =================================================================== --- llvm/lib/CodeGen/RegAllocBase.h +++ llvm/lib/CodeGen/RegAllocBase.h @@ -76,6 +76,8 @@ /// always available for the remat of all the siblings of the original reg. SmallPtrSet DeadRemats; + SmallSet FailedVRegs; + RegAllocBase(const RegClassFilterFunc F = allocateAllRegClasses) : ShouldAllocateClass(F) {} @@ -92,6 +94,10 @@ // rematerialization. virtual void postOptimization(); + /// Perform cleanups on registers that failed to allocate. This hacks on the + /// liveness in order to avoid spurious verifier errors in later passes. + void cleanupFailedVRegs(); + // Get a temporary reference to a Spiller instance. virtual Spiller &spiller() = 0; Index: llvm/lib/CodeGen/RegAllocBase.cpp =================================================================== --- llvm/lib/CodeGen/RegAllocBase.cpp +++ llvm/lib/CodeGen/RegAllocBase.cpp @@ -63,6 +63,7 @@ Matrix = &mat; MRI->freezeReservedRegs(vrm.getMachineFunction()); RegClassInfo.runOnMachineFunction(vrm.getMachineFunction()); + FailedVRegs.clear(); } // Visit all the live registers. If they are already assigned to a physical @@ -140,6 +141,8 @@ // Keep going after reporting the error. VRM->assignVirt2Phys(VirtReg->reg(), AllocOrder.front()); + + FailedVRegs.insert(VirtReg->reg()); } else if (AvailablePhysReg) Matrix->assign(*VirtReg, AvailablePhysReg); @@ -173,6 +176,40 @@ DeadRemats.clear(); } +void RegAllocBase::cleanupFailedVRegs() { + SmallSet JunkRegs; + + for (Register FailedReg : FailedVRegs) { + JunkRegs.insert(FailedReg); + + MCRegister PhysReg = VRM->getPhys(FailedReg); + LiveInterval &FailedInterval = LIS->getInterval(FailedReg); + + // The liveness information for the failed register and anything interfering + // with the physical register we arbitrarily chose is junk and needs to be + // deleted. + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(FailedInterval, *Units); + for (const LiveInterval *InterferingReg : Q.interferingVRegs()) + JunkRegs.insert(InterferingReg->reg()); + } + } + + // TODO: Probably need to set undef on any physreg uses not associated with + // a virtual register. + for (Register JunkReg : JunkRegs) { + // We still should produce valid IR. Kill all the uses and reduce the live + // ranges so that we don't think it's possible to introduce kill flags + // later which will fail the verifier. + for (MachineOperand &MO : MRI->reg_operands(JunkReg)) { + if (MO.readsReg()) + MO.setIsUndef(true); + } + + LIS->shrinkToUses(&LIS->getInterval(JunkReg)); + } +} + void RegAllocBase::enqueue(const LiveInterval *LI) { const Register Reg = LI->reg(); Index: llvm/lib/CodeGen/RegAllocBasic.cpp =================================================================== --- llvm/lib/CodeGen/RegAllocBasic.cpp +++ llvm/lib/CodeGen/RegAllocBasic.cpp @@ -321,6 +321,7 @@ allocatePhysRegs(); postOptimization(); + cleanupFailedVRegs(); // Diagnostic output before rewriting LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n"); Index: llvm/lib/CodeGen/RegAllocGreedy.cpp =================================================================== --- llvm/lib/CodeGen/RegAllocGreedy.cpp +++ llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -2699,6 +2699,7 @@ if (VerifyEnabled) MF->verify(this, "Before post optimization"); postOptimization(); + cleanupFailedVRegs(); reportStats(); releaseMemory(); Index: llvm/test/CodeGen/AMDGPU/issue48473.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/issue48473.mir +++ llvm/test/CodeGen/AMDGPU/issue48473.mir @@ -43,7 +43,8 @@ # %25 to $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 # CHECK-LABEL: name: issue48473 -# CHECK: S_NOP 0, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 +# CHECK: S_NOP 0, implicit undef renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit undef renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 + --- name: issue48473 Index: llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir +++ llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir @@ -1,10 +1,7 @@ -# RUN: not llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs=0 -start-before=greedy,1 -stop-after=virtregrewriter,1 %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s -# RUN: not --crash llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,1 %s -o /dev/null 2>&1 | FileCheck -check-prefixes=ERR,VERIFIER %s - -# FIXME: We should not produce a verifier error after erroring +# RUN: not llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,1 %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s # ERR: error: inline assembly requires more registers than available -# VERIFIER: *** Bad machine code: Using an undefined physical register *** +# ERR-NOT: Bad machine code # This testcase cannot be compiled with the enforced register # budget. Previously, tryLastChanceRecoloring would assert here. It Index: llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure0.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure0.mir @@ -0,0 +1,62 @@ +# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -vgpr-regalloc=basic -sgpr-regalloc=basic -start-before=regallocbasic,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s 2> %t.basic.err | FileCheck -check-prefix=BASIC %s +# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s 2> %t.greedy.err | FileCheck -check-prefix=GREEDY %s + +# RUN: FileCheck -check-prefix=ERR %s < %t.basic.err +# RUN: FileCheck -check-prefix=ERR %s < %t.greedy.err + +# This testcase must fail register allocation. It should also not +# produce a verifier error after doing so. Previously, it would not +# properly update the liveness for the dummy selected register. As a +# result, VirtRegRewriter would incorrectly add kill flags which +# combined with other uses of the physical register produced a +# verifier error. + +# ERR: error: ran out of registers during register allocation +# ERR-NEXT: error: ran out of registers during register allocation +# ERR-NOT: ERROR + + +# GREEDY: SI_SPILL_V256_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 +# GREEDY-NEXT: SI_SPILL_V512_SAVE undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19 +# GREEDY-NEXT: SI_SPILL_V128_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3 + +# GREEDY: dead renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19 = SI_SPILL_V512_RESTORE +# GREEDY: dead renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE +# GREEDY: S_NOP 0, implicit undef renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3 +# GREEDY: S_NOP 0, implicit killed renamable $vgpr20_vgpr21 + + +# BASIC: SI_SPILL_V128_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3 +# BASIC: SI_SPILL_V256_SAVE killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 +# BASIC: SI_SPILL_V512_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 +# BASIC: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.3, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.3, align 4, addrspace 5) +# BASIC: dead renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE +# BASIC: renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = SI_SPILL_V256_RESTORE +# BASIC: dead renamable $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE +# BASIC: S_NOP 0, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit killed renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3 +# BASIC: renamable $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE + +--- | + define void @killed_reg_after_regalloc_failure() #0 { + ret void + } + + attributes #0 = { "amdgpu-waves-per-eu"="10,10" } + +... +--- +name: killed_reg_after_regalloc_failure +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + S_NOP 0, implicit-def %0:vreg_512, implicit-def %1:vreg_256, implicit-def %2:vreg_128 + S_NOP 0, implicit-def %3:vreg_64 + S_NOP 0, implicit %0, implicit %1, implicit %2 + S_NOP 0, implicit %3 + S_ENDPGM 0 + +... Index: llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure1.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure1.ll @@ -0,0 +1,30 @@ +; RUN: not llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s + +; ERR: error: inline assembly requires more registers than available +; ERR-NOT: ERROR +; ERR-NOT: Bad machine code + +; This test requires respecting undef on the spill source operand when +; expanding the pseudos to avoid all verifier errors + +%asm.output = type { <16 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, <3 x i32> } + +define void @foo(<32 x i32> addrspace(1)* %arg) #0 { + %agpr0 = call i32 asm sideeffect "; def $0","=${a0}"() + %asm = call %asm.output asm sideeffect "; def $0 $1 $2 $3 $4","=v,=v,=v,=v,=v"() + %vgpr0 = extractvalue %asm.output %asm, 0 + %vgpr1 = extractvalue %asm.output %asm, 1 + %vgpr2 = extractvalue %asm.output %asm, 2 + %vgpr3 = extractvalue %asm.output %asm, 3 + %vgpr4 = extractvalue %asm.output %asm, 4 + call void asm sideeffect "; clobber", "~{a[0:31]},~{v[0:31]}"() + call void asm sideeffect "; use $0","v"(<16 x i32> %vgpr0) + call void asm sideeffect "; use $0","v"(<8 x i32> %vgpr1) + call void asm sideeffect "; use $0","v"(<4 x i32> %vgpr2) + call void asm sideeffect "; use $0","v"(<3 x i32> %vgpr3) + call void asm sideeffect "; use $0","v"(<3 x i32> %vgpr4) + call void asm sideeffect "; use $0","{a1}"(i32 %agpr0) + ret void +} + +attributes #0 = { "amdgpu-waves-per-eu"="8,8" }