diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1546,6 +1546,7 @@ bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); bool validateAGPRLdSt(const MCInst &Inst) const; bool validateVGPRAlign(const MCInst &Inst) const; + bool validateGWS(const MCInst &Inst, const OperandVector &Operands); bool validateDivScale(const MCInst &Inst); bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, const SMLoc &IDLoc); @@ -4108,6 +4109,34 @@ return true; } +// gfx90a has an undocumented limitation: +// DS_GWS opcodes must use even aligned registers. +bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, + const OperandVector &Operands) { + if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) + return true; + + int Opc = Inst.getOpcode(); + if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && + Opc != AMDGPU::DS_GWS_SEMA_BR_vi) + return true; + + const MCRegisterInfo *MRI = getMRI(); + const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); + int Data0Pos = + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); + assert(Data0Pos != -1); + auto Reg = Inst.getOperand(Data0Pos).getReg(); + auto RegIdx = Reg - (VGRP32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); + if (RegIdx & 1) { + SMLoc RegLoc = getRegLoc(Reg, Operands); + Error(RegLoc, "vgpr must be even aligned"); + return false; + } + + return true; +} + bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, const SMLoc &IDLoc) { @@ -4251,6 +4280,9 @@ "invalid register class: vgpr tuples must be 64 bit aligned"); return false; } + if (!validateGWS(Inst, Operands)) { + return false; + } if (!validateDivScale(Inst)) { Error(IDLoc, "ABS not allowed in VOP3B instructions"); diff --git a/llvm/test/MC/AMDGPU/gfx90a_err.s b/llvm/test/MC/AMDGPU/gfx90a_err.s --- a/llvm/test/MC/AMDGPU/gfx90a_err.s +++ b/llvm/test/MC/AMDGPU/gfx90a_err.s @@ -260,3 +260,21 @@ v_add_f32 v5, v1, lds_direct // GFX90A: error: lds_direct is not supported on this GPU + +ds_gws_init a1 offset:65535 gds +// GFX90A: error: vgpr must be even aligned + +ds_gws_init a255 offset:65535 gds +// GFX90A: error: vgpr must be even aligned + +ds_gws_sema_br v1 offset:65535 gds +// GFX90A: error: vgpr must be even aligned + +ds_gws_sema_br v255 offset:65535 gds +// GFX90A: error: vgpr must be even aligned + +ds_gws_barrier a3 offset:4 gds +// GFX90A: error: vgpr must be even aligned + +ds_gws_barrier a255 offset:4 gds +// GFX90A: error: vgpr must be even aligned diff --git a/llvm/test/MC/AMDGPU/gfx90a_err_pos.s b/llvm/test/MC/AMDGPU/gfx90a_err_pos.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx90a_err_pos.s @@ -0,0 +1,9 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck %s --implicit-check-not=error: --strict-whitespace + +//============================================================================== +// vgpr must be even aligned + +ds_gws_init a1 offset:65535 gds +// CHECK: error: vgpr must be even aligned +// CHECK-NEXT:{{^}}ds_gws_init a1 offset:65535 gds +// CHECK-NEXT:{{^}} ^ diff --git a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s --- a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s +++ b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s @@ -10225,65 +10225,65 @@ // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU ds_condxchg32_rtn_b64 a[6:7], v1, a[2:3] offset:65535 gds -// GFX90A: ds_gws_init a1 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0x01,0x00,0x00,0x00] +// GFX90A: ds_gws_init a0 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0x00,0x00,0x00,0x00] // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU -ds_gws_init a1 offset:65535 gds +ds_gws_init a0 offset:65535 gds -// GFX90A: ds_gws_init a255 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0xff,0x00,0x00,0x00] +// GFX90A: ds_gws_init a254 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0xfe,0x00,0x00,0x00] // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU -ds_gws_init a255 offset:65535 gds +ds_gws_init a254 offset:65535 gds -// GFX90A: ds_gws_init a1 gds ; encoding: [0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00] +// GFX90A: ds_gws_init a2 gds ; encoding: [0x00,0x00,0x33,0xdb,0x02,0x00,0x00,0x00] // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU -ds_gws_init a1 gds +ds_gws_init a2 gds -// GFX90A: ds_gws_init a1 gds ; encoding: [0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00] +// GFX90A: ds_gws_init a0 gds ; encoding: [0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00] // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU -ds_gws_init a1 gds +ds_gws_init a0 gds -// GFX90A: ds_gws_init a1 offset:4 gds ; encoding: [0x04,0x00,0x33,0xdb,0x01,0x00,0x00,0x00] +// GFX90A: ds_gws_init a0 offset:4 gds ; encoding: [0x04,0x00,0x33,0xdb,0x00,0x00,0x00,0x00] // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU -ds_gws_init a1 offset:4 gds +ds_gws_init a0 offset:4 gds -// GFX90A: ds_gws_sema_br a1 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0x01,0x00,0x00,0x00] +// GFX90A: ds_gws_sema_br a2 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0x02,0x00,0x00,0x00] // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU -ds_gws_sema_br a1 offset:65535 gds +ds_gws_sema_br a2 offset:65535 gds -// GFX90A: ds_gws_sema_br a255 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0xff,0x00,0x00,0x00] +// GFX90A: ds_gws_sema_br a254 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0xfe,0x00,0x00,0x00] // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU -ds_gws_sema_br a255 offset:65535 gds +ds_gws_sema_br a254 offset:65535 gds -// GFX90A: ds_gws_sema_br a1 gds ; encoding: [0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00] +// GFX90A: ds_gws_sema_br a0 gds ; encoding: [0x00,0x00,0x37,0xdb,0x00,0x00,0x00,0x00] // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU -ds_gws_sema_br a1 gds +ds_gws_sema_br a0 gds -// GFX90A: ds_gws_sema_br a1 gds ; encoding: [0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00] +// GFX90A: ds_gws_sema_br a2 gds ; encoding: [0x00,0x00,0x37,0xdb,0x02,0x00,0x00,0x00] // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU -ds_gws_sema_br a1 gds +ds_gws_sema_br a2 gds -// GFX90A: ds_gws_sema_br a1 offset:4 gds ; encoding: [0x04,0x00,0x37,0xdb,0x01,0x00,0x00,0x00] +// GFX90A: ds_gws_sema_br a0 offset:4 gds ; encoding: [0x04,0x00,0x37,0xdb,0x00,0x00,0x00,0x00] // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU -ds_gws_sema_br a1 offset:4 gds +ds_gws_sema_br a0 offset:4 gds -// GFX90A: ds_gws_barrier a1 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0x01,0x00,0x00,0x00] +// GFX90A: ds_gws_barrier a2 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0x02,0x00,0x00,0x00] // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU -ds_gws_barrier a1 offset:65535 gds +ds_gws_barrier a2 offset:65535 gds -// GFX90A: ds_gws_barrier a255 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0xff,0x00,0x00,0x00] +// GFX90A: ds_gws_barrier a254 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0xfe,0x00,0x00,0x00] // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU -ds_gws_barrier a255 offset:65535 gds +ds_gws_barrier a254 offset:65535 gds -// GFX90A: ds_gws_barrier a1 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00] +// GFX90A: ds_gws_barrier a0 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00] // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU -ds_gws_barrier a1 gds +ds_gws_barrier a0 gds -// GFX90A: ds_gws_barrier a1 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00] +// GFX90A: ds_gws_barrier a2 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00] // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU -ds_gws_barrier a1 gds +ds_gws_barrier a2 gds -// GFX90A: ds_gws_barrier a1 offset:4 gds ; encoding: [0x04,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00] +// GFX90A: ds_gws_barrier a0 offset:4 gds ; encoding: [0x04,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00] // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU -ds_gws_barrier a1 offset:4 gds +ds_gws_barrier a0 offset:4 gds // GFX90A: ds_consume a5 offset:65535 ; encoding: [0xff,0xff,0x7a,0xdb,0x00,0x00,0x00,0x05] // NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx90a_ldst_acc.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx90a_ldst_acc.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx90a_ldst_acc.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx90a_ldst_acc.txt @@ -7668,50 +7668,50 @@ # GFX90A: ds_condxchg32_rtn_b64 a[6:7], v1, a[2:3] offset:65535 gds ; encoding: [0xff,0xff,0xfd,0xda,0x01,0x02,0x00,0x06] 0xff,0xff,0xfd,0xda,0x01,0x02,0x00,0x06 -# GFX90A: ds_gws_init a1 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0x01,0x00,0x00,0x00] -0xff,0xff,0x33,0xdb,0x01,0x00,0x00,0x00 +# GFX90A: ds_gws_init a2 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0x02,0x00,0x00,0x00] +0xff,0xff,0x33,0xdb,0x02,0x00,0x00,0x00 -# GFX90A: ds_gws_init a255 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0xff,0x00,0x00,0x00] -0xff,0xff,0x33,0xdb,0xff,0x00,0x00,0x00 +# GFX90A: ds_gws_init a254 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0xfe,0x00,0x00,0x00] +0xff,0xff,0x33,0xdb,0xfe,0x00,0x00,0x00 -# GFX90A: ds_gws_init a1 gds ; encoding: [0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00] -0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00 +# GFX90A: ds_gws_init a0 gds ; encoding: [0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00] +0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00 -# GFX90A: ds_gws_init a1 gds ; encoding: [0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00] -0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00 +# GFX90A: ds_gws_init a0 gds ; encoding: [0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00] +0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00 -# GFX90A: ds_gws_init a1 offset:4 gds ; encoding: [0x04,0x00,0x33,0xdb,0x01,0x00,0x00,0x00] -0x04,0x00,0x33,0xdb,0x01,0x00,0x00,0x00 +# GFX90A: ds_gws_init a2 offset:4 gds ; encoding: [0x04,0x00,0x33,0xdb,0x02,0x00,0x00,0x00] +0x04,0x00,0x33,0xdb,0x02,0x00,0x00,0x00 -# GFX90A: ds_gws_sema_br a1 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0x01,0x00,0x00,0x00] -0xff,0xff,0x37,0xdb,0x01,0x00,0x00,0x00 +# GFX90A: ds_gws_sema_br a2 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0x02,0x00,0x00,0x00] +0xff,0xff,0x37,0xdb,0x02,0x00,0x00,0x00 -# GFX90A: ds_gws_sema_br a255 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0xff,0x00,0x00,0x00] -0xff,0xff,0x37,0xdb,0xff,0x00,0x00,0x00 +# GFX90A: ds_gws_sema_br a254 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0xfe,0x00,0x00,0x00] +0xff,0xff,0x37,0xdb,0xfe,0x00,0x00,0x00 -# GFX90A: ds_gws_sema_br a1 gds ; encoding: [0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00] -0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00 +# GFX90A: ds_gws_sema_br a0 gds ; encoding: [0x00,0x00,0x37,0xdb,0x00,0x00,0x00,0x00] +0x00,0x00,0x37,0xdb,0x00,0x00,0x00,0x00 -# GFX90A: ds_gws_sema_br a1 gds ; encoding: [0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00] -0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00 +# GFX90A: ds_gws_sema_br a2 gds ; encoding: [0x00,0x00,0x37,0xdb,0x02,0x00,0x00,0x00] +0x00,0x00,0x37,0xdb,0x02,0x00,0x00,0x00 -# GFX90A: ds_gws_sema_br a1 offset:4 gds ; encoding: [0x04,0x00,0x37,0xdb,0x01,0x00,0x00,0x00] -0x04,0x00,0x37,0xdb,0x01,0x00,0x00,0x00 +# GFX90A: ds_gws_sema_br a0 offset:4 gds ; encoding: [0x04,0x00,0x37,0xdb,0x00,0x00,0x00,0x00] +0x04,0x00,0x37,0xdb,0x00,0x00,0x00,0x00 -# GFX90A: ds_gws_barrier a1 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0x01,0x00,0x00,0x00] -0xff,0xff,0x3b,0xdb,0x01,0x00,0x00,0x00 +# GFX90A: ds_gws_barrier a2 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0x02,0x00,0x00,0x00] +0xff,0xff,0x3b,0xdb,0x02,0x00,0x00,0x00 -# GFX90A: ds_gws_barrier a255 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0xff,0x00,0x00,0x00] -0xff,0xff,0x3b,0xdb,0xff,0x00,0x00,0x00 +# GFX90A: ds_gws_barrier a254 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0xfe,0x00,0x00,0x00] +0xff,0xff,0x3b,0xdb,0xfe,0x00,0x00,0x00 -# GFX90A: ds_gws_barrier a1 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00] -0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00 +# GFX90A: ds_gws_barrier a0 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00] +0x00,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00 -# GFX90A: ds_gws_barrier a1 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00] -0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00 +# GFX90A: ds_gws_barrier a2 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00] +0x00,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00 -# GFX90A: ds_gws_barrier a1 offset:4 gds ; encoding: [0x04,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00] -0x04,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00 +# GFX90A: ds_gws_barrier a2 offset:4 gds ; encoding: [0x04,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00] +0x04,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00 # GFX90A: ds_consume a5 offset:65535 ; encoding: [0xff,0xff,0x7a,0xdb,0x00,0x00,0x00,0x05] 0xff,0xff,0x7a,0xdb,0x00,0x00,0x00,0x05