Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -263,6 +263,13 @@ reserveRegisterTuples(Reserved, Reg); } + for (auto Reg : AMDGPU::SReg_32RegClass) { + Reserved.set(getSubReg(Reg, AMDGPU::hi16)); + Register Low = getSubReg(Reg, AMDGPU::lo16); + if (!AMDGPU::SGPR_LO16RegClass.contains(Low)) + Reserved.set(Low); + } + // Reserve all the rest AGPRs if there are no instructions to use it. if (!ST.hasMAIInsts()) { for (unsigned i = 0; i < MaxNumVGPRs; ++i) { Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -123,25 +123,41 @@ class SIReg regIdx = 0> : Register { let Namespace = "AMDGPU"; - - // This is the not yet the complete register encoding. An additional - // bit is set for VGPRs. let HWEncoding = regIdx; } -class SIRegWithSubRegs subregs, bits<16> regIdx = 0> : +class SIRegWithSubRegs subregs, bits<16> regIdx> : RegisterWithSubRegs { - let Namespace = "AMDGPU"; +} - // This is the not yet the complete register encoding. An additional - // bit is set for VGPRs. - let HWEncoding = regIdx; - let CoveredBySubRegs = 1; +multiclass SIRegLoHi16 regIdx, bit ArtificialHigh = 1, + bit HWEncodingHigh = 0> { + // There is no special encoding for 16 bit subregs, these are not real + // registers but rather operands for instructions preserving other 16 bits + // of the result or reading just 16 bits of a 32 bit VGPR. + // It is encoded as a corresponding 32 bit register. + // Non-VGPR register classes use it as we need to have matching subregisters + // to move instructions and data between ALUs. + def _LO16 : SIReg { + let HWEncoding{8} = HWEncodingHigh; + } + def _HI16 : SIReg { + let isArtificial = ArtificialHigh; + let HWEncoding{8} = HWEncodingHigh; + } + def "" : RegisterWithSubRegs(NAME#"_LO16"), + !cast(NAME#"_HI16")]> { + let Namespace = "AMDGPU"; + let SubRegIndices = [lo16, hi16]; + let CoveredBySubRegs = 1; + let HWEncoding = regIdx; + let HWEncoding{8} = HWEncodingHigh; + } } // Special Registers -def VCC_LO : SIReg<"vcc_lo", 106>; -def VCC_HI : SIReg<"vcc_hi", 107>; +defm VCC_LO : SIRegLoHi16<"vcc_lo", 106>; +defm VCC_HI : SIRegLoHi16<"vcc_hi", 107>; // Pseudo-registers: Used as placeholders during isel and immediately // replaced, never seeing the verifier. @@ -164,8 +180,8 @@ let HWEncoding = 106; } -def EXEC_LO : SIReg<"exec_lo", 126>, DwarfRegNum<[1, 1]>; -def EXEC_HI : SIReg<"exec_hi", 127>; +defm EXEC_LO : SIRegLoHi16<"exec_lo", 126>, DwarfRegNum<[1, 1]>; +defm EXEC_HI : SIRegLoHi16<"exec_hi", 127>; def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>, DwarfRegNum<[17, 1]> { let Namespace = "AMDGPU"; @@ -175,22 +191,22 @@ // 32-bit real registers, for MC only. // May be used with both 32-bit and 64-bit operands. -def SRC_VCCZ : SIReg<"src_vccz", 251>; -def SRC_EXECZ : SIReg<"src_execz", 252>; -def SRC_SCC : SIReg<"src_scc", 253>; +defm SRC_VCCZ : SIRegLoHi16<"src_vccz", 251>; +defm SRC_EXECZ : SIRegLoHi16<"src_execz", 252>; +defm SRC_SCC : SIRegLoHi16<"src_scc", 253>; // 1-bit pseudo register, for codegen only. // Should never be emitted. def SCC : SIReg<"scc">; -def M0 : SIReg <"m0", 124>; -def SGPR_NULL : SIReg<"null", 125>; +defm M0 : SIRegLoHi16 <"m0", 124>; +defm SGPR_NULL : SIRegLoHi16 <"null", 125>; -def SRC_SHARED_BASE : SIReg<"src_shared_base", 235>; -def SRC_SHARED_LIMIT : SIReg<"src_shared_limit", 236>; -def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>; -def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>; -def SRC_POPS_EXITING_WAVE_ID : SIReg<"src_pops_exiting_wave_id", 239>; +defm SRC_SHARED_BASE : SIRegLoHi16<"src_shared_base", 235>; +defm SRC_SHARED_LIMIT : SIRegLoHi16<"src_shared_limit", 236>; +defm SRC_PRIVATE_BASE : SIRegLoHi16<"src_private_base", 237>; +defm SRC_PRIVATE_LIMIT : SIRegLoHi16<"src_private_limit", 238>; +defm SRC_POPS_EXITING_WAVE_ID : SIRegLoHi16<"src_pops_exiting_wave_id", 239>; def LDS_DIRECT : SIReg <"src_lds_direct", 254> { // There is no physical register corresponding to this. This is an @@ -199,8 +215,8 @@ let isArtificial = 1; } -def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>; -def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>; +defm XNACK_MASK_LO : SIRegLoHi16<"xnack_mask_lo", 104>; +defm XNACK_MASK_HI : SIRegLoHi16<"xnack_mask_hi", 105>; def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]> { @@ -210,8 +226,8 @@ } // Trap handler registers -def TBA_LO : SIReg<"tba_lo", 108>; -def TBA_HI : SIReg<"tba_hi", 109>; +defm TBA_LO : SIRegLoHi16<"tba_lo", 108>; +defm TBA_HI : SIRegLoHi16<"tba_hi", 109>; def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]> { let Namespace = "AMDGPU"; @@ -219,8 +235,8 @@ let HWEncoding = 108; } -def TMA_LO : SIReg<"tma_lo", 110>; -def TMA_HI : SIReg<"tma_hi", 111>; +defm TMA_LO : SIRegLoHi16<"tma_lo", 110>; +defm TMA_HI : SIRegLoHi16<"tma_hi", 111>; def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]> { let Namespace = "AMDGPU"; @@ -229,15 +245,15 @@ } foreach Index = 0-15 in { - def TTMP#Index#_vi : SIReg<"ttmp"#Index, !add(112, Index)>; - def TTMP#Index#_gfx9_gfx10 : SIReg<"ttmp"#Index, !add(108, Index)>; - def TTMP#Index : SIReg<"ttmp"#Index, 0>; + defm TTMP#Index#_vi : SIRegLoHi16<"ttmp"#Index, !add(112, Index)>; + defm TTMP#Index#_gfx9_gfx10 : SIRegLoHi16<"ttmp"#Index, !add(108, Index)>; + defm TTMP#Index : SIRegLoHi16<"ttmp"#Index, 0>; } multiclass FLAT_SCR_LOHI_m ci_e, bits<16> vi_e> { - def _ci : SIReg; - def _vi : SIReg; - def "" : SIReg; + defm _ci : SIRegLoHi16; + defm _vi : SIRegLoHi16; + defm "" : SIRegLoHi16; } class FlatReg encoding> : @@ -256,50 +272,17 @@ // SGPR registers foreach Index = 0-105 in { - def SGPR#Index#_LO16 : SIReg <"s"#Index#".l", Index>, - DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)), - !if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]>; - - // This is a placeholder to fill high lane in mask. - def SGPR#Index#_HI16 : SIReg <"", Index> { - let isArtificial = 1; - } - - def SGPR#Index : - SIRegWithSubRegs <"s"#Index, [!cast("SGPR"#Index#"_LO16"), - !cast("SGPR"#Index#"_HI16")], - Index>, - DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)), - !if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]> { - let SubRegIndices = [lo16, hi16]; - } + defm SGPR#Index : + SIRegLoHi16 <"s"#Index, Index>, + DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)), + !if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]>; } // VGPR registers foreach Index = 0-255 in { - // There is no special encoding for low 16 bit subreg, this not a real - // register but rather an operand for instructions preserving high 16 bits - // of the result or reading just low 16 bits of a 32 bit VGPR. - // It is encoded as a corresponding 32 bit register. - def VGPR#Index#_LO16 : SIReg <"v"#Index#".l", Index>, - DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]> { - let HWEncoding{8} = 1; - } - // There is no special encoding for low 16 bit subreg, this not a real - // register but rather an operand for instructions preserving low 16 bits - // of the result or reading just high 16 bits of a 32 bit VGPR. - // It is encoded as a corresponding 32 bit register. - def VGPR#Index#_HI16 : SIReg <"v"#Index#".h", Index>, - DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]> { - let HWEncoding{8} = 1; - } - def VGPR#Index : SIRegWithSubRegs <"v"#Index, - [!cast("VGPR"#Index#"_LO16"), !cast("VGPR"#Index#"_HI16")], - Index>, - DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]> { - let HWEncoding{8} = 1; - let SubRegIndices = [lo16, hi16]; - } + defm VGPR#Index : + SIRegLoHi16 <"v"#Index, Index, 0, 1>, + DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]>; } // AccVGPR registers Index: llvm/test/CodeGen/AMDGPU/ipra-regmask.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/ipra-regmask.ll +++ llvm/test/CodeGen/AMDGPU/ipra-regmask.ll @@ -24,13 +24,13 @@ ret void } -; CHECK-DAG: special_regs Clobbered Registers: $scc $m0 {{$}} +; CHECK-DAG: special_regs Clobbered Registers: $scc $m0 $m0_hi16 $m0_lo16 {{$}} define void @special_regs() #0 { call void asm sideeffect "", "~{m0},~{scc}"() #0 ret void } -; CHECK-DAG: vcc Clobbered Registers: $vcc $vcc_hi $vcc_lo {{$}} +; CHECK-DAG: vcc Clobbered Registers: $vcc $vcc_hi $vcc_lo $vcc_hi_hi16 $vcc_hi_lo16 $vcc_lo_hi16 $vcc_lo_lo16 {{$}} define void @vcc() #0 { call void asm sideeffect "", "~{vcc}"() #0 ret void