Index: llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1386,10 +1386,15 @@ return AMDGPU::isGFX9(getSTI()); } + // TODO: isGFX90A is also true for GFX940. We need to clean it. bool isGFX90A() const { return AMDGPU::isGFX90A(getSTI()); } + bool isGFX940() const { + return AMDGPU::isGFX940(getSTI()); + } + bool isGFX9Plus() const { return AMDGPU::isGFX9Plus(getSTI()); } @@ -4256,7 +4261,7 @@ return false; } - if (isGFX90A() && (CPol & CPol::SCC)) { + if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); StringRef CStr(S.getPointer()); S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); @@ -4269,7 +4274,8 @@ if (TSFlags & SIInstrFlags::IsAtomicRet) { if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { - Error(IDLoc, "instruction must use glc"); + Error(IDLoc, isGFX940() ? "instruction must use sc0" + : "instruction must use glc"); return false; } } else { @@ -4277,7 +4283,8 @@ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); StringRef CStr(S.getPointer()); S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); - Error(S, "instruction must not use glc"); + Error(S, isGFX940() ? "instruction must not use sc0" + : "instruction must not use glc"); return false; } } @@ -5626,7 +5633,24 @@ unsigned CPolOff = 0; SMLoc S = getLoc(); - if (trySkipId("glc")) + StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); + if (isGFX940() && !Mnemo.startswith("s_")) { + if (trySkipId("sc0")) + CPolOn = AMDGPU::CPol::SC0; + else if (trySkipId("nosc0")) + CPolOff = AMDGPU::CPol::SC0; + else if (trySkipId("nt")) + CPolOn = AMDGPU::CPol::NT; + else if (trySkipId("nont")) + CPolOff = AMDGPU::CPol::NT; + else if (trySkipId("sc1")) + CPolOn = AMDGPU::CPol::SC1; + else if (trySkipId("nosc1")) + CPolOff = AMDGPU::CPol::SC1; + else + return MatchOperand_NoMatch; + } + else if (trySkipId("glc")) CPolOn = AMDGPU::CPol::GLC; else if (trySkipId("noglc")) CPolOff = AMDGPU::CPol::GLC; Index: llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp =================================================================== --- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -206,13 +206,15 @@ const MCSubtargetInfo &STI, raw_ostream &O) { auto Imm = MI->getOperand(OpNo).getImm(); if (Imm & CPol::GLC) - O << " glc"; + O << ((AMDGPU::isGFX940(STI) && + !(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD)) ? " sc0" + : " glc"); if (Imm & CPol::SLC) - O << " slc"; + O << (AMDGPU::isGFX940(STI) ? " nt" : " slc"); if ((Imm & CPol::DLC) && AMDGPU::isGFX10Plus(STI)) O << " dlc"; if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI)) - O << " scc"; + O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc"); if (Imm & ~CPol::ALL) O << " /* unexpected cache policy bit */"; } Index: llvm/lib/Target/AMDGPU/SIDefines.h =================================================================== --- llvm/lib/Target/AMDGPU/SIDefines.h +++ llvm/lib/Target/AMDGPU/SIDefines.h @@ -294,6 +294,9 @@ SLC = 2, DLC = 4, SCC = 16, + SC0 = GLC, + SC1 = SCC, + NT = SLC, ALL = GLC | SLC | DLC | SCC }; Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -780,6 +780,7 @@ bool isGFX10_BEncoding(const MCSubtargetInfo &STI); bool hasGFX10_3Insts(const MCSubtargetInfo &STI); bool isGFX90A(const MCSubtargetInfo &STI); +bool isGFX940(const MCSubtargetInfo &STI); bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); bool hasMAIInsts(const MCSubtargetInfo &STI); int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR); Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1530,6 +1530,10 @@ return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]; } +bool isGFX940(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGFX940Insts]; +} + bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; } Index: llvm/test/MC/AMDGPU/gfx940_asm_features.s =================================================================== --- llvm/test/MC/AMDGPU/gfx940_asm_features.s +++ llvm/test/MC/AMDGPU/gfx940_asm_features.s @@ -1,5 +1,45 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck --check-prefix=GFX940 --strict-whitespace %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=GFX90A --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=NOT-GFX940,GFX90A --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=NOT-GFX940 --implicit-check-not=error: %s + +// NOT-GFX940: error: invalid operand for instruction +// GFX940: global_load_dword v2, v[2:3], off sc0 ; encoding: [0x00,0x80,0x51,0xdc,0x02,0x00,0x7f,0x02] +global_load_dword v2, v[2:3], off sc0 + +// NOT-GFX940: error: invalid operand for instruction +// GFX940: global_load_dword v2, v[2:3], off ; encoding: [0x00,0x80,0x50,0xdc,0x02,0x00,0x7f,0x02] +global_load_dword v2, v[2:3], off nosc0 + +// NOT-GFX940: error: invalid operand for instruction +// GFX940: global_load_dword v2, v[2:3], off sc1 ; encoding: [0x00,0x80,0x50,0xde,0x02,0x00,0x7f,0x02] +global_load_dword v2, v[2:3], off sc1 + +// NOT-GFX940: error: invalid operand for instruction +// GFX940: global_load_dword v2, v[2:3], off ; encoding: [0x00,0x80,0x50,0xdc,0x02,0x00,0x7f,0x02] +global_load_dword v2, v[2:3], off nosc1 + +// NOT-GFX940: error: invalid operand for instruction +// GFX940: global_load_dword v2, v[2:3], off nt ; encoding: [0x00,0x80,0x52,0xdc,0x02,0x00,0x7f,0x02] +global_load_dword v2, v[2:3], off nt + +// NOT-GFX940: error: invalid operand for instruction +// GFX940: global_load_dword v2, v[2:3], off ; encoding: [0x00,0x80,0x50,0xdc,0x02,0x00,0x7f,0x02] +global_load_dword v2, v[2:3], off nont + +// GFX940: s_load_dword s2, s[2:3], 0x0 glc ; encoding: [0x81,0x00,0x03,0xc0,0x00,0x00,0x00,0x00] +s_load_dword s2, s[2:3], 0x0 glc + +// NOT-GFX940: error: invalid operand for instruction +// GFX940: buffer_load_dword v5, off, s[8:11], s3 sc0 nt sc1 ; encoding: [0x00,0xc0,0x52,0xe0,0x00,0x05,0x02,0x03] +buffer_load_dword v5, off, s[8:11], s3 sc0 nt sc1 + +// NOT-GFX940: error: invalid operand for instruction +// GFX940: buffer_atomic_swap v5, off, s[8:11], s3 sc0 ; encoding: [0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03] +buffer_atomic_swap v5, off, s[8:11], s3 sc0 + +// NOT-GFX940: error: invalid operand for instruction +// GFX940: buffer_atomic_swap v5, off, s[8:11], s3 nt ; encoding: [0x00,0x00,0x02,0xe1,0x00,0x05,0x02,0x03] +buffer_atomic_swap v5, off, s[8:11], s3 nt // GFX90A: error: instruction not supported on this GPU // GFX940: v_fmamk_f32 v0, v2, 0x42c80000, v3 ; encoding: [0x02,0x07,0x00,0x2e,0x00,0x00,0xc8,0x42] Index: llvm/test/MC/AMDGPU/gfx940_err.s =================================================================== --- llvm/test/MC/AMDGPU/gfx940_err.s +++ llvm/test/MC/AMDGPU/gfx940_err.s @@ -15,3 +15,21 @@ v_mad_legacy_f32 v0, v1, v2, v3 // GFX940: error: instruction not supported on this GPU + +global_load_dword v2, v[2:3], off glc +// GFX940: error: invalid operand for instruction + +global_load_dword v2, v[2:3], off slc +// GFX940: error: invalid operand for instruction + +global_load_dword v2, v[2:3], off scc +// GFX940: error: invalid operand for instruction + +s_load_dword s2, s[2:3], 0x0 sc0 +// GFX940: error: invalid operand for instruction + +buffer_atomic_swap v5, off, s[8:11], s3 glc +// GFX940: error: invalid operand for instruction + +buffer_atomic_swap v5, off, s[8:11], s3 slc +// GFX940: error: invalid operand for instruction Index: llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt +++ llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt @@ -1,5 +1,26 @@ # RUN: llvm-mc -arch=amdgcn -mcpu=gfx940 -disassemble -show-encoding %s | FileCheck -strict-whitespace --check-prefix=GFX940 %s +# GFX940: global_load_dword v2, v[2:3], off sc0 ; encoding: [0x00,0x80,0x51,0xdc,0x02,0x00,0x7f,0x02] +0x00,0x80,0x51,0xdc,0x02,0x00,0x7f,0x02 + +# GFX940: global_load_dword v2, v[2:3], off sc1 ; encoding: [0x00,0x80,0x50,0xde,0x02,0x00,0x7f,0x02] +0x00,0x80,0x50,0xde,0x02,0x00,0x7f,0x02 + +# GFX940: global_load_dword v2, v[2:3], off nt ; encoding: [0x00,0x80,0x52,0xdc,0x02,0x00,0x7f,0x02] +0x00,0x80,0x52,0xdc,0x02,0x00,0x7f,0x02 + +# GFX940: s_load_dword s2, s[2:3], 0x0 glc ; encoding: [0x81,0x00,0x03,0xc0,0x00,0x00,0x00,0x00] +0x81,0x00,0x03,0xc0,0x00,0x00,0x00,0x00 + +# GFX940: buffer_load_dword v5, off, s[8:11], s3 sc0 nt sc1 ; encoding: [0x00,0xc0,0x52,0xe0,0x00,0x05,0x02,0x03] +0x00,0xc0,0x52,0xe0,0x00,0x05,0x02,0x03 + +# GFX940: buffer_atomic_swap v5, off, s[8:11], s3 sc0 ; encoding: [0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03] +0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03 + +# GFX940: buffer_atomic_swap v5, off, s[8:11], s3 nt ; encoding: [0x00,0x00,0x02,0xe1,0x00,0x05,0x02,0x03] +0x00,0x00,0x02,0xe1,0x00,0x05,0x02,0x03 + # GFX940: v_fmamk_f32 v0, v2, 0x42c80000, v3 ; encoding: [0x02,0x07,0x00,0x2e,0x00,0x00,0xc8,0x42] 0x02,0x07,0x00,0x2e,0x00,0x00,0xc8,0x42