Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1191,6 +1191,8 @@ return AMDGPU::SI_SPILL_S128_SAVE; case 20: return AMDGPU::SI_SPILL_S160_SAVE; + case 24: + return AMDGPU::SI_SPILL_S192_SAVE; case 32: return AMDGPU::SI_SPILL_S256_SAVE; case 64: @@ -1214,6 +1216,8 @@ return AMDGPU::SI_SPILL_V128_SAVE; case 20: return AMDGPU::SI_SPILL_V160_SAVE; + case 24: + return AMDGPU::SI_SPILL_V192_SAVE; case 32: return AMDGPU::SI_SPILL_V256_SAVE; case 64: @@ -1319,6 +1323,8 @@ return AMDGPU::SI_SPILL_S128_RESTORE; case 20: return AMDGPU::SI_SPILL_S160_RESTORE; + case 24: + return AMDGPU::SI_SPILL_S192_RESTORE; case 32: return AMDGPU::SI_SPILL_S256_RESTORE; case 64: @@ -1342,6 +1348,8 @@ return AMDGPU::SI_SPILL_V128_RESTORE; case 20: return AMDGPU::SI_SPILL_V160_RESTORE; + case 24: + return AMDGPU::SI_SPILL_V192_RESTORE; case 32: return AMDGPU::SI_SPILL_V256_RESTORE; case 64: Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -662,6 +662,7 @@ defm SI_SPILL_S96 : SI_SPILL_SGPR ; defm SI_SPILL_S128 : SI_SPILL_SGPR ; defm SI_SPILL_S160 : SI_SPILL_SGPR ; +defm SI_SPILL_S192 : SI_SPILL_SGPR ; defm SI_SPILL_S256 : SI_SPILL_SGPR ; defm SI_SPILL_S512 : SI_SPILL_SGPR ; defm SI_SPILL_S1024 : SI_SPILL_SGPR ; @@ -701,6 +702,7 @@ defm SI_SPILL_V96 : SI_SPILL_VGPR ; defm SI_SPILL_V128 : SI_SPILL_VGPR ; defm SI_SPILL_V160 : SI_SPILL_VGPR ; +defm SI_SPILL_V192 : SI_SPILL_VGPR ; defm SI_SPILL_V256 : SI_SPILL_VGPR ; defm SI_SPILL_V512 : SI_SPILL_VGPR ; defm SI_SPILL_V1024 : SI_SPILL_VGPR ; Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -556,6 +556,11 @@ case AMDGPU::SI_SPILL_V256_SAVE: case AMDGPU::SI_SPILL_V256_RESTORE: return 8; + case AMDGPU::SI_SPILL_S192_SAVE: + case AMDGPU::SI_SPILL_S192_RESTORE: + case AMDGPU::SI_SPILL_V192_SAVE: + case AMDGPU::SI_SPILL_V192_RESTORE: + return 6; case AMDGPU::SI_SPILL_S160_SAVE: case AMDGPU::SI_SPILL_S160_RESTORE: case AMDGPU::SI_SPILL_V160_SAVE: @@ -1203,6 +1208,7 @@ case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S192_SAVE: case AMDGPU::SI_SPILL_S160_SAVE: case AMDGPU::SI_SPILL_S128_SAVE: case AMDGPU::SI_SPILL_S96_SAVE: @@ -1212,6 +1218,7 @@ case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: + case AMDGPU::SI_SPILL_S192_RESTORE: case AMDGPU::SI_SPILL_S160_RESTORE: case AMDGPU::SI_SPILL_S128_RESTORE: case AMDGPU::SI_SPILL_S96_RESTORE: @@ -1247,6 +1254,7 @@ case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S192_SAVE: case AMDGPU::SI_SPILL_S160_SAVE: case AMDGPU::SI_SPILL_S128_SAVE: case AMDGPU::SI_SPILL_S96_SAVE: @@ -1260,6 +1268,7 @@ case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: + case AMDGPU::SI_SPILL_S192_RESTORE: case AMDGPU::SI_SPILL_S160_RESTORE: case AMDGPU::SI_SPILL_S128_RESTORE: case AMDGPU::SI_SPILL_S96_RESTORE: Index: llvm/test/CodeGen/AMDGPU/spill-wide-vgpr.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/spill-wide-vgpr.ll @@ -0,0 +1,28 @@ +; RUN: llc -O0 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +define amdgpu_kernel void @spill_vgpr_x5(i32 addrspace(1)* %out, i32 %in) #0 { + %wide.vgpr = call <5 x i32> asm sideeffect "; def $0", "=v" () #0 + %cmp = icmp eq i32 %in, 0 + br i1 %cmp, label %bb0, label %ret + +bb0: + call void asm sideeffect "; use $0", "v"(<5 x i32> %wide.vgpr) #0 + br label %ret + +ret: + ret void +} + +define amdgpu_kernel void @spill_vgpr_x6(i32 addrspace(1)* %out, i32 %in) #0 { + %wide.vgpr = call <6 x i32> asm sideeffect "; def $0", "=v" () #0 + %cmp = icmp eq i32 %in, 0 + br i1 %cmp, label %bb0, label %ret + +bb0: + call void asm sideeffect "; use $0", "v"(<6 x i32> %wide.vgpr) #0 + br label %ret + +ret: + ret void +} + Index: llvm/test/CodeGen/AMDGPU/spill192.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/spill192.mir @@ -0,0 +1,104 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast,si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=EXPANDED %s + +# Make sure spill/restore of 192 bit registers works. We have to +# settle for a MIR test for now since inlineasm fails without 192-bit +# MVT. + +--- +name: spill_restore_sgpr192 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + ; SPILLED-LABEL: name: spill_restore_sgpr192 + ; SPILLED: bb.0: + ; SPILLED: successors: %bb.1(0x80000000) + ; SPILLED: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; SPILLED: SI_SPILL_S192_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 24 into %stack.0, align 4, addrspace 5) + ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; SPILLED: bb.1: + ; SPILLED: successors: %bb.2(0x80000000) + ; SPILLED: S_NOP 1 + ; SPILLED: bb.2: + ; SPILLED: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 = SI_SPILL_S192_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 24 from %stack.0, align 4, addrspace 5) + ; SPILLED: S_NOP 0, implicit renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED-LABEL: name: spill_restore_sgpr192 + ; EXPANDED: bb.0: + ; EXPANDED: successors: %bb.1(0x80000000) + ; EXPANDED: liveins: $vgpr0 + ; EXPANDED: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr4, 0, undef $vgpr0 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr5, 1, $vgpr0 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr6, 2, $vgpr0 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr7, 3, $vgpr0 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr8, 4, $vgpr0 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr9, 5, $vgpr0 + ; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; EXPANDED: bb.1: + ; EXPANDED: successors: %bb.2(0x80000000) + ; EXPANDED: liveins: $vgpr0 + ; EXPANDED: S_NOP 1 + ; EXPANDED: bb.2: + ; EXPANDED: liveins: $vgpr0 + ; EXPANDED: $sgpr4 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $sgpr5 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 1 + ; EXPANDED: $sgpr6 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 2 + ; EXPANDED: $sgpr7 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 3 + ; EXPANDED: $sgpr8 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 4 + ; EXPANDED: $sgpr9 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 5 + ; EXPANDED: S_NOP 0, implicit renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + bb.0: + S_NOP 0, implicit-def %0:sgpr_192 + S_CBRANCH_SCC1 implicit undef $scc, %bb.1 + + bb.1: + S_NOP 1 + + bb.2: + S_NOP 0, implicit %0 +... + +--- +name: spill_restore_vgpr192 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + ; SPILLED-LABEL: name: spill_restore_vgpr192 + ; SPILLED: bb.0: + ; SPILLED: successors: %bb.1(0x80000000) + ; SPILLED: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; SPILLED: SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 24 into %stack.0, align 4, addrspace 5) + ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; SPILLED: bb.1: + ; SPILLED: successors: %bb.2(0x80000000) + ; SPILLED: S_NOP 1 + ; SPILLED: bb.2: + ; SPILLED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5) + ; SPILLED: S_NOP 0, implicit renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; EXPANDED-LABEL: name: spill_restore_vgpr192 + ; EXPANDED: bb.0: + ; EXPANDED: successors: %bb.1(0x80000000) + ; EXPANDED: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; EXPANDED: SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 24 into %stack.0, align 4, addrspace 5) + ; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; EXPANDED: bb.1: + ; EXPANDED: successors: %bb.2(0x80000000) + ; EXPANDED: S_NOP 1 + ; EXPANDED: bb.2: + ; EXPANDED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5) + ; EXPANDED: S_NOP 0, implicit renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + bb.0: + S_NOP 0, implicit-def %0:vreg_192 + S_CBRANCH_SCC1 implicit undef $scc, %bb.1 + + bb.1: + S_NOP 1 + + bb.2: + S_NOP 0, implicit %0 +...