diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -764,12 +764,16 @@ break; } - if (AMDGPU::SReg_32RegClass.contains(Reg)) { + if (AMDGPU::SReg_32RegClass.contains(Reg) || + AMDGPU::SGPR_LO16RegClass.contains(Reg) || + AMDGPU::SGPR_HI16RegClass.contains(Reg)) { assert(!AMDGPU::TTMP_32RegClass.contains(Reg) && "trap handler registers should not be used"); IsSGPR = true; Width = 1; - } else if (AMDGPU::VGPR_32RegClass.contains(Reg)) { + } else if (AMDGPU::VGPR_32RegClass.contains(Reg) || + AMDGPU::VGPR_LO16RegClass.contains(Reg) || + AMDGPU::VGPR_HI16RegClass.contains(Reg)) { IsSGPR = false; Width = 1; } else if (AMDGPU::AGPR_32RegClass.contains(Reg)) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// def SGPRRegBank : RegisterBank<"SGPR", - [SReg_32, SReg_64, SReg_128, SReg_256, SReg_512, SReg_1024] + [SGPR_LO16, SReg_32, SReg_64, SReg_128, SReg_256, SReg_512, SReg_1024] >; def VGPRRegBank : RegisterBank<"VGPR", diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -100,7 +100,8 @@ unsigned Reg = CS.getReg(); MachineInstrSpan MIS(I, &SaveBlock); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, MVT::i32); TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC, TRI); @@ -133,7 +134,8 @@ if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { for (const CalleeSavedInfo &CI : reverse(CSI)) { unsigned Reg = CI.getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, MVT::i32); TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); assert(I != RestoreBlock.begin() && @@ -206,7 +208,8 @@ for (unsigned I = 0; CSRegs[I]; ++I) { unsigned Reg = CSRegs[I]; if (SavedRegs.test(Reg)) { - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, MVT::i32); int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), TRI->getSpillAlignment(*RC), true); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1281,6 +1281,7 @@ static const TargetRegisterClass *const BaseClasses[] = { &AMDGPU::VGPR_LO16RegClass, &AMDGPU::VGPR_HI16RegClass, + &AMDGPU::SGPR_LO16RegClass, &AMDGPU::VGPR_32RegClass, &AMDGPU::SReg_32RegClass, &AMDGPU::AGPR_32RegClass, @@ -1375,6 +1376,8 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( const TargetRegisterClass *SRC) const { switch (getRegSizeInBits(*SRC)) { + case 16: + return &AMDGPU::VGPR_LO16RegClass; case 32: return &AMDGPU::VGPR_32RegClass; case 64: @@ -1419,6 +1422,8 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass( const TargetRegisterClass *VRC) const { switch (getRegSizeInBits(*VRC)) { + case 16: + return &AMDGPU::SGPR_LO16RegClass; case 32: return &AMDGPU::SGPR_32RegClass; case 64: @@ -1795,6 +1800,7 @@ case AMDGPU::VGPR_HI16RegClassID: return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF)); case AMDGPU::SGPR_32RegClassID: + case AMDGPU::SGPR_LO16RegClassID: return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF)); } } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -253,10 +253,23 @@ // SGPR registers foreach Index = 0-105 in { - def SGPR#Index : - SIReg <"s"#Index, Index>, + def SGPR#Index#_LO16 : SIReg <"s"#Index#".l", Index>, DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)), !if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]>; + + // This is a placeholder to fill high lane in mask. + def SGPR#Index#_HI16 : SIReg <"", Index> { + let isArtificial = 1; + } + + def SGPR#Index : + SIRegWithSubRegs <"s"#Index, [!cast("SGPR"#Index#"_LO16"), + !cast("SGPR"#Index#"_HI16")], + Index>, + DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)), + !if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]> { + let SubRegIndices = [lo16, hi16]; + } } // VGPR registers @@ -317,6 +330,20 @@ // TODO: Do we need to set DwarfRegAlias on register tuples? +def SGPR_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16, + (add (sequence "SGPR%u_LO16", 0, 105))> { + let AllocationPriority = 1; + let Size = 16; + let GeneratePressureSet = 0; +} + +def SGPR_HI16 : RegisterClass<"AMDGPU", [i16, f16], 16, + (add (sequence "SGPR%u_HI16", 0, 105))> { + let isAllocatable = 0; + let Size = 16; + let GeneratePressureSet = 0; +} + // SGPR 32-bit registers def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add (sequence "SGPR%u", 0, 105))> { diff --git a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir --- a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir +++ b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir @@ -43,7 +43,7 @@ ; GCN: DS_WRITE_B32_gfx9 $vgpr0, $vgpr3, 4, 0, implicit killed $m0, implicit $exec ; GCN: } ; GCN: S_NOP 0 - ; GCN: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit undef $sgpr0_sgpr1, implicit undef $sgpr10 { + ; GCN: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit undef $sgpr0_sgpr1, implicit undef $sgpr10 { ; GCN: $sgpr2 = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0, 0 ; GCN: $sgpr3 = S_LOAD_DWORD_SGPR undef $sgpr0_sgpr1, undef $sgpr10, 0, 0 ; GCN: } diff --git a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir --- a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir +++ b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir @@ -73,7 +73,7 @@ # (1) %0.sub0 + %0.sub0 and (2) %0.sub1 + %0.sub1 # Check that renaming (2) does not inadvertently rename (1). # CHECK-LABEL: name: test2 -# CHECK: INLINEASM &"", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_XEXEC_with_sub0 */, def undef %0.sub0, 327690 /* regdef:SReg_1_XEXEC_with_sub0 */, def dead %1.sub1, 2147483657 /* reguse tiedto:$0 */, undef %0.sub0(tied-def 3), 2147549193 /* reguse tiedto:$1 */, %1.sub1(tied-def 5) +# CHECK: INLINEASM &"", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def undef %0.sub0, 327690 /* regdef:SReg_1_with_sub0 */, def dead %1.sub1, 2147483657 /* reguse tiedto:$0 */, undef %0.sub0(tied-def 3), 2147549193 /* reguse tiedto:$1 */, %1.sub1(tied-def 5) name: test2 body: | bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir --- a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir +++ b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir @@ -33,7 +33,7 @@ ; CHECK: dead %9:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; CHECK: undef %11.sub1:vreg_512 = COPY [[COPY]].sub1 - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def dead [[COPY1]], 851978 /* regdef:VRegOrLds_32 */, def dead [[COPY]].sub1, 2147483657 /* reguse tiedto:$0 */, [[COPY1]], 2147549193 /* reguse tiedto:$1 */, [[COPY]].sub1 + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def dead [[COPY1]], 851978 /* regdef:SGPR_LO16 */, def dead [[COPY]].sub1, 2147483657 /* reguse tiedto:$0 */, [[COPY1]], 2147549193 /* reguse tiedto:$1 */, [[COPY]].sub1 ; CHECK: %11.sub0:vreg_512 = COPY [[COPY]].sub0 ; CHECK: %11.sub3:vreg_512 = COPY [[COPY]].sub3 ; CHECK: dead %10:vgpr_32 = V_ADD_I32_e32 4, [[V_MOV_B32_e32_1]], implicit-def dead $vcc, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir --- a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir +++ b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir @@ -36,18 +36,18 @@ ; CHECK: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def dead %11 + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def dead %11 ; CHECK: GLOBAL_STORE_DWORD undef %12:vreg_64, [[BUFFER_LOAD_DWORD_OFFEN]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; CHECK: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3) - ; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def %15, 851978 /* regdef:VRegOrLds_32 */, def %16 + ; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %15, 851978 /* regdef:SGPR_LO16 */, def %16 ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec ; CHECK: [[DS_READ_B32_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec ; CHECK: [[DS_READ_B32_gfx9_2:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec - ; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def %21, 851978 /* regdef:VRegOrLds_32 */, def %22 + ; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %21, 851978 /* regdef:SGPR_LO16 */, def %22 ; CHECK: [[DS_READ_B32_gfx9_3:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def dead [[V_MOV_B32_e32_2]], 851978 /* regdef:VRegOrLds_32 */, def dead [[V_MOV_B32_e32_3]], 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B64_gfx9_]].sub0, 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193 /* reguse tiedto:$1 */, [[V_MOV_B32_e32_3]](tied-def 5), 851977 /* reguse:VRegOrLds_32 */, %15, 851977 /* reguse:VRegOrLds_32 */, %16, 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B32_gfx9_1]], 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B32_gfx9_]], 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B32_gfx9_3]], 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B32_gfx9_2]] + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def dead [[V_MOV_B32_e32_2]], 851978 /* regdef:SGPR_LO16 */, def dead [[V_MOV_B32_e32_3]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B64_gfx9_]].sub0, 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193 /* reguse tiedto:$1 */, [[V_MOV_B32_e32_3]](tied-def 5), 851977 /* reguse:SGPR_LO16 */, %15, 851977 /* reguse:SGPR_LO16 */, %16, 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_1]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_3]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_2]] ; CHECK: %5.sub1:vreg_64 = COPY [[V_MOV_B32_e32_]] ; CHECK: DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3) ; CHECK: DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir b/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir --- a/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir +++ b/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir @@ -25,9 +25,9 @@ ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3) - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3) - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B32_gfx9_]] - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def undef %0.sub0, 851978 /* regdef:VRegOrLds_32 */, def undef %0.sub1 + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3) + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_]] + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub0, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub1 ; CHECK: S_NOP 0, implicit %0.sub1 ; CHECK: $sgpr10 = S_MOV_B32 -1 ; CHECK: S_BRANCH %bb.1 @@ -63,9 +63,9 @@ ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3) - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3) - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B32_gfx9_]] - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def undef %0.sub1, 851978 /* regdef:VRegOrLds_32 */, def undef %0.sub0 + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3) + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_]] + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub1, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub0 ; CHECK: S_NOP 0, implicit %0.sub1 ; CHECK: $sgpr10 = S_MOV_B32 -1 ; CHECK: S_BRANCH %bb.1