Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1191,6 +1191,8 @@ return AMDGPU::SI_SPILL_S128_SAVE; case 20: return AMDGPU::SI_SPILL_S160_SAVE; + case 24: + return AMDGPU::SI_SPILL_S192_SAVE; case 32: return AMDGPU::SI_SPILL_S256_SAVE; case 64: @@ -1214,6 +1216,8 @@ return AMDGPU::SI_SPILL_V128_SAVE; case 20: return AMDGPU::SI_SPILL_V160_SAVE; + case 24: + return AMDGPU::SI_SPILL_V192_SAVE; case 32: return AMDGPU::SI_SPILL_V256_SAVE; case 64: @@ -1319,6 +1323,8 @@ return AMDGPU::SI_SPILL_S128_RESTORE; case 20: return AMDGPU::SI_SPILL_S160_RESTORE; + case 24: + return AMDGPU::SI_SPILL_S192_RESTORE; case 32: return AMDGPU::SI_SPILL_S256_RESTORE; case 64: @@ -1342,6 +1348,8 @@ return AMDGPU::SI_SPILL_V128_RESTORE; case 20: return AMDGPU::SI_SPILL_V160_RESTORE; + case 24: + return AMDGPU::SI_SPILL_V192_RESTORE; case 32: return AMDGPU::SI_SPILL_V256_RESTORE; case 64: Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -662,6 +662,7 @@ defm SI_SPILL_S96 : SI_SPILL_SGPR ; defm SI_SPILL_S128 : SI_SPILL_SGPR ; defm SI_SPILL_S160 : SI_SPILL_SGPR ; +defm SI_SPILL_S192 : SI_SPILL_SGPR ; defm SI_SPILL_S256 : SI_SPILL_SGPR ; defm SI_SPILL_S512 : SI_SPILL_SGPR ; defm SI_SPILL_S1024 : SI_SPILL_SGPR ; @@ -701,6 +702,7 @@ defm SI_SPILL_V96 : SI_SPILL_VGPR ; defm SI_SPILL_V128 : SI_SPILL_VGPR ; defm SI_SPILL_V160 : SI_SPILL_VGPR ; +defm SI_SPILL_V192 : SI_SPILL_VGPR ; defm SI_SPILL_V256 : SI_SPILL_VGPR ; defm SI_SPILL_V512 : SI_SPILL_VGPR ; defm SI_SPILL_V1024 : SI_SPILL_VGPR ; Index: llvm/test/CodeGen/AMDGPU/spill-wide-vgpr.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/spill-wide-vgpr.ll @@ -0,0 +1,28 @@ +; RUN: llc -O0 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +define amdgpu_kernel void @spill_vgpr_x5(i32 addrspace(1)* %out, i32 %in) #0 { + %wide.vgpr = call <5 x i32> asm sideeffect "; def $0", "=v" () #0 + %cmp = icmp eq i32 %in, 0 + br i1 %cmp, label %bb0, label %ret + +bb0: + call void asm sideeffect "; use $0", "v"(<5 x i32> %wide.vgpr) #0 + br label %ret + +ret: + ret void +} + +define amdgpu_kernel void @spill_vgpr_x6(i32 addrspace(1)* %out, i32 %in) #0 { + %wide.vgpr = call <6 x i32> asm sideeffect "; def $0", "=v" () #0 + %cmp = icmp eq i32 %in, 0 + br i1 %cmp, label %bb0, label %ret + +bb0: + call void asm sideeffect "; use $0", "v"(<6 x i32> %wide.vgpr) #0 + br label %ret + +ret: + ret void +} + Index: llvm/test/CodeGen/AMDGPU/spill192.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/spill192.mir @@ -0,0 +1,66 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast -o - %s | FileCheck -check-prefix=GCN %s + +# Make sure spill/restore of 192 bit registers works. We have to +# settle for a MIR test for now since inlineasm fails without 192-bit +# MVT. + +--- +name: spill_restore_sgpr192 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + ; GCN-LABEL: name: spill_restore_sgpr192 + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; GCN: SI_SPILL_S192_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 24 into %stack.0, align 4, addrspace 5) + ; GCN: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: S_NOP 1 + ; GCN: bb.2: + ; GCN: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 = SI_SPILL_S192_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 24 from %stack.0, align 4, addrspace 5) + ; GCN: S_NOP 0, implicit renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + bb.0: + S_NOP 0, implicit-def %0:sgpr_192 + S_CBRANCH_SCC1 implicit undef $scc, %bb.1 + + bb.1: + S_NOP 1 + + bb.2: + S_NOP 0, implicit %0 +... + +--- +name: spill_restore_vgpr192 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + ; GCN-LABEL: name: spill_restore_vgpr192 + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; GCN: SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 24 into %stack.0, align 4, addrspace 5) + ; GCN: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: S_NOP 1 + ; GCN: bb.2: + ; GCN: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5) + ; GCN: S_NOP 0, implicit renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + bb.0: + S_NOP 0, implicit-def %0:vreg_192 + S_CBRANCH_SCC1 implicit undef $scc, %bb.1 + + bb.1: + S_NOP 1 + + bb.2: + S_NOP 0, implicit %0 +...