Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -663,6 +663,11 @@ case AMDGPU::FLAT_SCR_HI: continue; + case AMDGPU::XNACK_MASK: + case AMDGPU::XNACK_MASK_LO: + case AMDGPU::XNACK_MASK_HI: + llvm_unreachable("xnack_mask registers should not be used"); + case AMDGPU::TBA: case AMDGPU::TBA_LO: case AMDGPU::TBA_HI: Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -896,6 +896,10 @@ KernelScope.initialize(getContext()); } + bool hasXNACK() const { + return AMDGPU::hasXNACK(getSTI()); + } + bool isSI() const { return AMDGPU::isSI(getSTI()); } @@ -1521,12 +1525,15 @@ .Case("exec", AMDGPU::EXEC) .Case("vcc", AMDGPU::VCC) .Case("flat_scratch", AMDGPU::FLAT_SCR) + .Case("xnack_mask", AMDGPU::XNACK_MASK) .Case("m0", AMDGPU::M0) .Case("scc", AMDGPU::SCC) .Case("tba", AMDGPU::TBA) .Case("tma", AMDGPU::TMA) .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) + .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) + .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) .Case("vcc_lo", AMDGPU::VCC_LO) .Case("vcc_hi", AMDGPU::VCC_HI) .Case("exec_lo", AMDGPU::EXEC_LO) @@ -1564,6 +1571,11 @@ RegWidth = 2; return true; } + if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { + Reg = AMDGPU::XNACK_MASK; + RegWidth = 2; + return true; + } if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { Reg = AMDGPU::VCC; RegWidth = 2; @@ -2617,6 +2629,10 @@ case AMDGPU::TMA_LO: case AMDGPU::TMA_HI: return !isGFX9(); + case AMDGPU::XNACK_MASK: + case AMDGPU::XNACK_MASK_LO: + case AMDGPU::XNACK_MASK_HI: + return !isCI() && !isSI() && hasXNACK(); default: break; } Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -690,9 +690,8 @@ switch (Val) { case 102: return createRegOperand(FLAT_SCR_LO); case 103: return createRegOperand(FLAT_SCR_HI); - // ToDo: no support for xnack_mask_lo/_hi register - case 104: - case 105: break; + case 104: return createRegOperand(XNACK_MASK_LO); + case 105: return createRegOperand(XNACK_MASK_HI); case 106: return createRegOperand(VCC_LO); case 107: return createRegOperand(VCC_HI); case 108: assert(!isGFX9()); return createRegOperand(TBA_LO); @@ -722,6 +721,7 @@ switch (Val) { case 102: return createRegOperand(FLAT_SCR); + case 104: return createRegOperand(XNACK_MASK); case 106: return createRegOperand(VCC); case 108: assert(!isGFX9()); return createRegOperand(TBA); case 110: assert(!isGFX9()); return createRegOperand(TMA); Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -267,6 +267,9 @@ case AMDGPU::FLAT_SCR: O << "flat_scratch"; return; + case AMDGPU::XNACK_MASK: + O << "xnack_mask"; + return; case AMDGPU::VCC_LO: O << "vcc_lo"; return; @@ -297,6 +300,12 @@ case AMDGPU::FLAT_SCR_HI: O << "flat_scratch_hi"; return; + case AMDGPU::XNACK_MASK_LO: + O << "xnack_mask_lo"; + return; + case AMDGPU::XNACK_MASK_HI: + O << "xnack_mask_hi"; + return; case AMDGPU::FP_REG: case AMDGPU::SP_REG: case AMDGPU::SCRATCH_WAVE_OFFSET_REG: Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -163,6 +163,9 @@ reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE); reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT); + // Reserve xnack_mask registers - support is not implemented in Codegen. + reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK); + // Reserve Trap Handler registers - support is not implemented in Codegen. reserveRegisterTuples(Reserved, AMDGPU::TBA); reserveRegisterTuples(Reserved, AMDGPU::TMA); Index: lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.td +++ lib/Target/AMDGPU/SIRegisterInfo.td @@ -76,6 +76,16 @@ def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>; def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>; +def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>; +def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>; + +def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>, + DwarfRegAlias { + let Namespace = "AMDGPU"; + let SubRegIndices = [sub0, sub1]; + let HWEncoding = 104; +} + // Trap handler registers def TBA_LO : SIReg<"tba_lo", 108>; def TBA_HI : SIReg<"tba_hi", 109>; @@ -403,7 +413,7 @@ // Subset of SReg_32 without M0 for SMRD instructions and alike. // See comments in SIInstructions.td for more info. def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, + (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> { let AllocationPriority = 7; @@ -435,7 +445,7 @@ } def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32, - (add SGPR_64, VCC, FLAT_SCR, TTMP_64, TBA, TMA)> { + (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> { let CopyCost = 1; let AllocationPriority = 8; } Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -278,6 +278,8 @@ } } +bool hasXNACK(const MCSubtargetInfo &STI); + bool isSI(const MCSubtargetInfo &STI); bool isCI(const MCSubtargetInfo &STI); bool isVI(const MCSubtargetInfo &STI); Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -598,6 +598,10 @@ } } +bool hasXNACK(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureXNACK]; +} + bool isSI(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; } Index: test/MC/AMDGPU/xnack-mask.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/xnack-mask.s @@ -0,0 +1,30 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICIVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICIVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICIVI %s + +// RUN: not llvm-mc -arch=amdgcn -mcpu=stoney -show-encoding %s 2>&1 | FileCheck -check-prefix=XNACKERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=stoney -show-encoding %s | FileCheck -check-prefix=XNACK %s + +s_mov_b64 xnack_mask, -1 +// NOSICIVI: error: not a valid operand. +// XNACK: s_mov_b64 xnack_mask, -1 ; encoding: [0xc1,0x01,0xe8,0xbe] + +s_mov_b32 xnack_mask_lo, -1 +// NOSICIVI: error: not a valid operand. +// XNACK: s_mov_b32 xnack_mask_lo, -1 ; encoding: [0xc1,0x00,0xe8,0xbe] + +s_mov_b32 xnack_mask_hi, -1 +// NOSICIVI: error: not a valid operand. +// XNACK: s_mov_b32 xnack_mask_hi, -1 ; encoding: [0xc1,0x00,0xe9,0xbe] + +s_mov_b32 xnack_mask, -1 +// NOSICIVI: error: not a valid operand. +// XNACKERR: error: invalid operand for instruction + +s_mov_b64 xnack_mask_lo, -1 +// NOSICIVI: error: not a valid operand. +// XNACKERR: error: invalid operand for instruction + +s_mov_b64 xnack_mask_hi, -1 +// NOSICIVI: error: not a valid operand. +// XNACKERR: error: invalid operand for instruction Index: test/MC/Disassembler/AMDGPU/sop1_vi.txt =================================================================== --- test/MC/Disassembler/AMDGPU/sop1_vi.txt +++ test/MC/Disassembler/AMDGPU/sop1_vi.txt @@ -15,12 +15,21 @@ # VI: s_mov_b32 s0, 0xfe5163ab ; encoding: [0xff,0x00,0x80,0xbe,0xab,0x63,0x51,0xfe] 0xff 0x00 0x80 0xbe 0xab 0x63 0x51 0xfe +# VI: s_mov_b32 xnack_mask_lo, -1 ; encoding: [0xc1,0x00,0xe8,0xbe] +0xc1,0x00,0xe8,0xbe + +# VI: s_mov_b32 xnack_mask_hi, -1 ; encoding: [0xc1,0x00,0xe9,0xbe] +0xc1,0x00,0xe9,0xbe + # VI: s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x01,0x82,0xbe] 0x04 0x01 0x82 0xbe -# FIXME: s_mov_b64 s[2:3], -1 ; encoding: [0xc1,0x01,0x82,0xbe] +# VI: s_mov_b64 s[2:3], -1 ; encoding: [0xc1,0x01,0x82,0xbe] 0xc1 0x01 0x82 0xbe +# VI: s_mov_b64 xnack_mask, -1 ; encoding: [0xc1,0x01,0xe8,0xbe] +0xc1,0x01,0xe8,0xbe + # VI: s_mov_b64 s[2:3], 0xffffffff ; encoding: [0xff,0x01,0x82,0xbe,0xff,0xff,0xff,0xff] 0xff 0x01 0x82 0xbe 0xff 0xff 0xff 0xff Index: test/MC/Disassembler/AMDGPU/vop3_gfx9.txt =================================================================== --- test/MC/Disassembler/AMDGPU/vop3_gfx9.txt +++ test/MC/Disassembler/AMDGPU/vop3_gfx9.txt @@ -210,6 +210,9 @@ # GFX9: v_mad_mix_f32 v5, flat_scratch_hi, v2, v3 ; encoding: [0x05,0x00,0xa0,0xd3,0x67,0x04,0x0e,0x04] 0x05,0x00,0xa0,0xd3,0x67,0x04,0x0e,0x04 +# GFX9: v_mad_mix_f32 v5, xnack_mask_hi, v2, v3 ; encoding: [0x05,0x00,0xa0,0xd3,0x69,0x04,0x0e,0x04] +0x05,0x00,0xa0,0xd3,0x69,0x04,0x0e,0x04 + # GFX9: v_mad_mix_f32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0xa0,0xd3,0x6a,0x04,0x0e,0x04] 0x05,0x00,0xa0,0xd3,0x6a,0x04,0x0e,0x04 @@ -665,3 +668,6 @@ # GFX9: v_interp_p2_legacy_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x76,0xd2,0x00,0x04,0x0e,0x04] 0x05,0x80,0x76,0xd2,0x00,0x04,0x0e,0x04 + +# GFX9: v_add_f64 v[5:6], xnack_mask, v[2:3] ; encoding: [0x05,0x00,0x80,0xd2,0x68,0x04,0x02,0x00] +0x05,0x00,0x80,0xd2,0x68,0x04,0x02,0x00