Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -776,7 +776,8 @@ AMDGPU::VGPR_HI16RegClass.contains(Reg)) { IsSGPR = false; Width = 1; - } else if (AMDGPU::AGPR_32RegClass.contains(Reg)) { + } else if (AMDGPU::AGPR_32RegClass.contains(Reg) || + AMDGPU::AGPR_LO16RegClass.contains(Reg)) { IsSGPR = false; IsAGPR = true; Width = 1; Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td @@ -18,5 +18,5 @@ def VCCRegBank : RegisterBank <"VCC", [SReg_1]>; def AGPRRegBank : RegisterBank <"AGPR", - [AGPR_32, AReg_64, AReg_128, AReg_512, AReg_1024] + [AGPR_LO16, AGPR_32, AReg_64, AReg_128, AReg_512, AReg_1024] >; Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -679,20 +679,27 @@ } if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass || - RC == &AMDGPU::SGPR_LO16RegClass) { + RC == &AMDGPU::SGPR_LO16RegClass || RC == &AMDGPU::AGPR_LO16RegClass) { assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) || AMDGPU::VGPR_HI16RegClass.contains(SrcReg) || - AMDGPU::SGPR_LO16RegClass.contains(SrcReg)); + AMDGPU::SGPR_LO16RegClass.contains(SrcReg) || + AMDGPU::AGPR_LO16RegClass.contains(SrcReg)); bool IsSGPRDst = AMDGPU::SGPR_LO16RegClass.contains(DestReg); bool IsSGPRSrc = AMDGPU::SGPR_LO16RegClass.contains(SrcReg); + bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg); + bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg); bool DstLow = (RC == &AMDGPU::VGPR_LO16RegClass || - RC == &AMDGPU::SGPR_LO16RegClass); + RC == &AMDGPU::SGPR_LO16RegClass || + RC == &AMDGPU::AGPR_LO16RegClass); bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) || - AMDGPU::SGPR_LO16RegClass.contains(SrcReg); + AMDGPU::SGPR_LO16RegClass.contains(SrcReg) || + AMDGPU::AGPR_LO16RegClass.contains(SrcReg); const TargetRegisterClass *DstRC = IsSGPRDst ? &AMDGPU::SGPR_32RegClass + : IsAGPRDst ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass; const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SGPR_32RegClass + : IsAGPRSrc ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass; MCRegister NewDestReg = RI.getMatchingSuperReg(DestReg, DstLow ? AMDGPU::lo16 : AMDGPU::hi16, @@ -712,6 +719,16 @@ return; } + if (IsAGPRDst || IsAGPRSrc) { + if (!DstLow || !SrcLow) { + reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc, + "Cannot use hi16 subreg with an AGPR!"); + } + + copyPhysReg(MBB, MI, DL, NewDestReg, NewSrcReg, KillSrc); + return; + } + if (IsSGPRSrc && !ST.hasSDWAScalar()) { if (!DstLow || !SrcLow) { reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc, Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -270,6 +270,10 @@ Reserved.set(Low); } + for (auto Reg : AMDGPU::AGPR_32RegClass) { + Reserved.set(getSubReg(Reg, AMDGPU::hi16)); + } + // Reserve all the rest AGPRs if there are no instructions to use it. if (!ST.hasMAIInsts()) { for (unsigned i = 0; i < MaxNumVGPRs; ++i) { @@ -1312,6 +1316,8 @@ const TargetRegisterClass * SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) { switch (BitWidth) { + case 16: + return &AMDGPU::AGPR_LO16RegClass; case 32: return &AMDGPU::AGPR_32RegClass; case 64: @@ -1361,6 +1367,7 @@ &AMDGPU::VGPR_LO16RegClass, &AMDGPU::VGPR_HI16RegClass, &AMDGPU::SGPR_LO16RegClass, + &AMDGPU::AGPR_LO16RegClass, &AMDGPU::VGPR_32RegClass, &AMDGPU::SReg_32RegClass, &AMDGPU::AGPR_32RegClass, @@ -1413,7 +1420,7 @@ bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const { unsigned Size = getRegSizeInBits(*RC); - if (Size < 32) + if (Size < 16) return false; const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size); if (!ARC) { Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -284,11 +284,9 @@ // AccVGPR registers foreach Index = 0-255 in { - def AGPR#Index : - SIReg <"a"#Index, Index>, - DwarfRegNum<[!add(Index, 3072), !add(Index, 2048)]> { - let HWEncoding{8} = 1; - } + defm AGPR#Index : + SIRegLoHi16 <"a"#Index, Index, 1, 1>, + DwarfRegNum<[!add(Index, 3072), !add(Index, 2048)]>; } //===----------------------------------------------------------------------===// @@ -497,6 +495,13 @@ // VGPR 1024-bit registers def VGPR_1024 : SIRegisterTuples.ret, VGPR_32, 255, 1, 32, "v">; +def AGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16, + (add (sequence "AGPR%u_LO16", 0, 255))> { + let isAllocatable = 0; + let Size = 16; + let GeneratePressureSet = 0; +} + // AccVGPR 32-bit registers def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add (sequence "AGPR%u", 0, 255))> { Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1083,6 +1083,11 @@ // (move from MC* level to Target* level). Return size in bits. unsigned getRegBitWidth(unsigned RCID) { switch (RCID) { + case AMDGPU::VGPR_LO16RegClassID: + case AMDGPU::VGPR_HI16RegClassID: + case AMDGPU::SGPR_LO16RegClassID: + case AMDGPU::AGPR_LO16RegClassID: + return 16; case AMDGPU::SGPR_32RegClassID: case AMDGPU::VGPR_32RegClassID: case AMDGPU::VRegOrLds_32RegClassID: Index: llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir +++ llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir @@ -29,3 +29,27 @@ $vgpr1_hi16 = COPY killed $sgpr0_lo16 S_ENDPGM 0 ... + +# GCN-LABEL: {{^}}lo_to_lo_illegal_agpr_to_sgpr: +# GCN: ; illegal copy a0.l to s1.l +# ERR: error: :0:0: in function lo_to_lo_illegal_agpr_to_sgpr void (): illegal SGPR to VGPR copy +name: lo_to_lo_illegal_agpr_to_sgpr +tracksRegLiveness: true +body: | + bb.0: + $agpr0 = IMPLICIT_DEF + $sgpr1_lo16 = COPY $agpr0_lo16 + S_ENDPGM 0 +... + +# GCN-LABEL: {{^}}lo_to_hi_vgpr_to_agpr: +# GCN: ; illegal copy v0.h to a1.l +# ERR: error: :0:0: in function lo_to_hi_vgpr_to_agpr void (): Cannot use hi16 subreg with an AGPR! +name: lo_to_hi_vgpr_to_agpr +tracksRegLiveness: true +body: | + bb.0: + $vgpr0 = IMPLICIT_DEF + $agpr1_lo16 = COPY killed $vgpr0_hi16 + S_ENDPGM 0 +... Index: llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-agpr.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-agpr.mir @@ -0,0 +1,52 @@ +# RUN: llc -march=amdgcn -mcpu=gfx908 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: {{^}}lo_to_lo_agpr_to_agpr: +# GCN: v_accvgpr_read_b32 [[TMP:v[0-9]+]], a0 +# GCN-NEXT: s_nop 1 +# GCN-NEXT: v_accvgpr_write_b32 a1, [[TMP]] +name: lo_to_lo_agpr_to_agpr +tracksRegLiveness: true +body: | + bb.0: + $agpr0 = IMPLICIT_DEF + $agpr1_lo16 = COPY $agpr0_lo16 + S_ENDPGM 0 +... + +# GCN-LABEL: {{^}}lo_to_lo_samereg: +# GCN: s_waitcnt +# GCN-NEXT: s_endpgm +name: lo_to_lo_samereg +tracksRegLiveness: true +body: | + bb.0: + $agpr0 = IMPLICIT_DEF + $agpr0_lo16 = COPY $agpr0_lo16 + S_ENDPGM 0 +... + +# GCN-LABEL: {{^}}lo_to_lo_undef_agpr_to_agpr: +# GCN: v_accvgpr_read_b32 [[TMP:v[0-9]+]], a1 +# GCN-NEXT: s_nop 1 +# GCN-NEXT: v_accvgpr_write_b32 a2, [[TMP]] +name: lo_to_lo_undef_agpr_to_agpr +tracksRegLiveness: true +body: | + bb.0: + $agpr1_lo16 = COPY undef $agpr0_lo16 + $agpr2 = COPY killed $agpr1 + S_ENDPGM 0 +... + +# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_agpr: +# GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], s0 +# GCN-NEXT: s_nop 1 +# GCN-NEXT: v_accvgpr_write_b32 a1, [[TMP]] +name: lo_to_lo_sgpr_to_agpr +tracksRegLiveness: true +body: | + bb.0: + $sgpr0 = IMPLICIT_DEF + $agpr1_lo16 = COPY $sgpr0_lo16 + S_ENDPGM 0 +... Index: llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir +++ llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir @@ -33,7 +33,7 @@ ; CHECK: dead %9:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; CHECK: undef %11.sub1:vreg_512 = COPY [[COPY]].sub1 - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def dead [[COPY1]], 851978 /* regdef:SGPR_LO16 */, def dead [[COPY]].sub1, 2147483657 /* reguse tiedto:$0 */, [[COPY1]], 2147549193 /* reguse tiedto:$1 */, [[COPY]].sub1 + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_HI16 */, def dead [[COPY1]], 851978 /* regdef:SGPR_HI16 */, def dead [[COPY]].sub1, 2147483657 /* reguse tiedto:$0 */, [[COPY1]], 2147549193 /* reguse tiedto:$1 */, [[COPY]].sub1 ; CHECK: %11.sub0:vreg_512 = COPY [[COPY]].sub0 ; CHECK: %11.sub3:vreg_512 = COPY [[COPY]].sub3 ; CHECK: dead %10:vgpr_32 = V_ADD_I32_e32 4, [[V_MOV_B32_e32_1]], implicit-def dead $vcc, implicit $exec Index: llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir +++ llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir @@ -36,18 +36,18 @@ ; CHECK: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def dead %11 + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_HI16 */, def dead %11 ; CHECK: GLOBAL_STORE_DWORD undef %12:vreg_64, [[BUFFER_LOAD_DWORD_OFFEN]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; CHECK: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3) - ; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %15, 851978 /* regdef:SGPR_LO16 */, def %16 + ; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_HI16 */, def %15, 851978 /* regdef:SGPR_HI16 */, def %16 ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec ; CHECK: [[DS_READ_B32_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec ; CHECK: [[DS_READ_B32_gfx9_2:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec - ; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %21, 851978 /* regdef:SGPR_LO16 */, def %22 + ; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_HI16 */, def %21, 851978 /* regdef:SGPR_HI16 */, def %22 ; CHECK: [[DS_READ_B32_gfx9_3:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def dead [[V_MOV_B32_e32_2]], 851978 /* regdef:SGPR_LO16 */, def dead [[V_MOV_B32_e32_3]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B64_gfx9_]].sub0, 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193 /* reguse tiedto:$1 */, [[V_MOV_B32_e32_3]](tied-def 5), 851977 /* reguse:SGPR_LO16 */, %15, 851977 /* reguse:SGPR_LO16 */, %16, 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_1]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_3]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_2]] + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_HI16 */, def dead [[V_MOV_B32_e32_2]], 851978 /* regdef:SGPR_HI16 */, def dead [[V_MOV_B32_e32_3]], 851977 /* reguse:SGPR_HI16 */, [[DS_READ_B64_gfx9_]].sub0, 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193 /* reguse tiedto:$1 */, [[V_MOV_B32_e32_3]](tied-def 5), 851977 /* reguse:SGPR_HI16 */, %15, 851977 /* reguse:SGPR_HI16 */, %16, 851977 /* reguse:SGPR_HI16 */, [[DS_READ_B32_gfx9_1]], 851977 /* reguse:SGPR_HI16 */, [[DS_READ_B32_gfx9_]], 851977 /* reguse:SGPR_HI16 */, [[DS_READ_B32_gfx9_3]], 851977 /* reguse:SGPR_HI16 */, [[DS_READ_B32_gfx9_2]] ; CHECK: %5.sub1:vreg_64 = COPY [[V_MOV_B32_e32_]] ; CHECK: DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3) ; CHECK: DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3) Index: llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir +++ llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir @@ -25,9 +25,9 @@ ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3) - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3) - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_]] - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub0, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub1 + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_HI16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3) + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SGPR_HI16 */, [[DS_READ_B32_gfx9_]] + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_HI16 */, def undef %0.sub0, 851978 /* regdef:SGPR_HI16 */, def undef %0.sub1 ; CHECK: S_NOP 0, implicit %0.sub1 ; CHECK: $sgpr10 = S_MOV_B32 -1 ; CHECK: S_BRANCH %bb.1 @@ -63,9 +63,9 @@ ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3) - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3) - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_]] - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub1, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub0 + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_HI16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3) + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SGPR_HI16 */, [[DS_READ_B32_gfx9_]] + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_HI16 */, def undef %0.sub1, 851978 /* regdef:SGPR_HI16 */, def undef %0.sub0 ; CHECK: S_NOP 0, implicit %0.sub1 ; CHECK: $sgpr10 = S_MOV_B32 -1 ; CHECK: S_BRANCH %bb.1