Index: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -415,6 +415,11 @@ return isSSrcF16(); } + bool isSSrcOrLdsB32() const { + return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || + isLiteralImm(MVT::i32) || isExpr(); + } + bool isVCSrcB32() const { return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); } @@ -2477,6 +2482,73 @@ return true; } +static bool IsRevOpcode(const unsigned Opcode) +{ + switch (Opcode) { + case AMDGPU::V_SUBREV_F32_e32: + case AMDGPU::V_SUBREV_F32_e64: + case AMDGPU::V_SUBREV_F32_e32_si: + case AMDGPU::V_SUBREV_F32_e32_vi: + case AMDGPU::V_SUBREV_F32_e64_si: + case AMDGPU::V_SUBREV_F32_e64_vi: + case AMDGPU::V_SUBREV_I32_e32: + case AMDGPU::V_SUBREV_I32_e64: + case AMDGPU::V_SUBREV_I32_e32_si: + case AMDGPU::V_SUBREV_I32_e64_si: + case AMDGPU::V_SUBBREV_U32_e32: + case AMDGPU::V_SUBBREV_U32_e64: + case AMDGPU::V_SUBBREV_U32_e32_si: + case AMDGPU::V_SUBBREV_U32_e32_vi: + case AMDGPU::V_SUBBREV_U32_e64_si: + case AMDGPU::V_SUBBREV_U32_e64_vi: + case AMDGPU::V_SUBREV_U32_e32: + case AMDGPU::V_SUBREV_U32_e64: + case AMDGPU::V_SUBREV_U32_e32_gfx9: + case AMDGPU::V_SUBREV_U32_e32_vi: + case AMDGPU::V_SUBREV_U32_e64_gfx9: + case AMDGPU::V_SUBREV_U32_e64_vi: + case AMDGPU::V_SUBREV_F16_e32: + case AMDGPU::V_SUBREV_F16_e64: + case AMDGPU::V_SUBREV_F16_e32_vi: + case AMDGPU::V_SUBREV_F16_e64_vi: + case AMDGPU::V_SUBREV_U16_e32: + case AMDGPU::V_SUBREV_U16_e64: + case AMDGPU::V_SUBREV_U16_e32_vi: + case AMDGPU::V_SUBREV_U16_e64_vi: + case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: + case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: + case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: + case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: + case AMDGPU::V_LSHLREV_B32_e32_si: + case AMDGPU::V_LSHLREV_B32_e64_si: + case AMDGPU::V_LSHLREV_B16_e32_vi: + case AMDGPU::V_LSHLREV_B16_e64_vi: + case AMDGPU::V_LSHLREV_B32_e32_vi: + case AMDGPU::V_LSHLREV_B32_e64_vi: + case AMDGPU::V_LSHLREV_B64_vi: + case AMDGPU::V_LSHRREV_B32_e32_si: + case AMDGPU::V_LSHRREV_B32_e64_si: + case AMDGPU::V_LSHRREV_B16_e32_vi: + case AMDGPU::V_LSHRREV_B16_e64_vi: + case AMDGPU::V_LSHRREV_B32_e32_vi: + case AMDGPU::V_LSHRREV_B32_e64_vi: + case AMDGPU::V_LSHRREV_B64_vi: + case AMDGPU::V_ASHRREV_I32_e64_si: + case AMDGPU::V_ASHRREV_I32_e32_si: + case AMDGPU::V_ASHRREV_I16_e32_vi: + case AMDGPU::V_ASHRREV_I16_e64_vi: + case AMDGPU::V_ASHRREV_I32_e32_vi: + case AMDGPU::V_ASHRREV_I32_e64_vi: + case AMDGPU::V_ASHRREV_I64_vi: + case AMDGPU::V_PK_LSHLREV_B16_vi: + case AMDGPU::V_PK_LSHRREV_B16_vi: + case AMDGPU::V_PK_ASHRREV_I16_vi: + return true; + default: + return false; + } +} + bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { using namespace SIInstrFlags; @@ -2511,50 +2583,7 @@ return true; // lds_direct is specified as src0. Check additional limitations. - - // FIXME: This is a workaround for bug 37943 - // which allows 64-bit VOP3 opcodes use 32-bit operands. - if (AMDGPU::getRegOperandSize(getMRI(), Desc, Src0Idx) != 4) - return false; - - // Documentation does not disable lds_direct for SDWA, but SP3 assembler does. - // FIXME: This inconsistence needs to be investigated further. - if (Desc.TSFlags & SIInstrFlags::SDWA) - return false; - - // The following opcodes do not accept lds_direct which is explicitly stated - // in AMD documentation. However SP3 disables lds_direct for most other 'rev' - // opcodes as well (e.g. for v_subrev_u32 but not for v_subrev_f32). - // FIXME: This inconsistence needs to be investigated further. - switch (Opcode) { - case AMDGPU::V_LSHLREV_B32_e32_si: - case AMDGPU::V_LSHLREV_B32_e64_si: - case AMDGPU::V_LSHLREV_B16_e32_vi: - case AMDGPU::V_LSHLREV_B16_e64_vi: - case AMDGPU::V_LSHLREV_B32_e32_vi: - case AMDGPU::V_LSHLREV_B32_e64_vi: - case AMDGPU::V_LSHLREV_B64_vi: - case AMDGPU::V_LSHRREV_B32_e32_si: - case AMDGPU::V_LSHRREV_B32_e64_si: - case AMDGPU::V_LSHRREV_B16_e32_vi: - case AMDGPU::V_LSHRREV_B16_e64_vi: - case AMDGPU::V_LSHRREV_B32_e32_vi: - case AMDGPU::V_LSHRREV_B32_e64_vi: - case AMDGPU::V_LSHRREV_B64_vi: - case AMDGPU::V_ASHRREV_I32_e64_si: - case AMDGPU::V_ASHRREV_I32_e32_si: - case AMDGPU::V_ASHRREV_I16_e32_vi: - case AMDGPU::V_ASHRREV_I16_e64_vi: - case AMDGPU::V_ASHRREV_I32_e32_vi: - case AMDGPU::V_ASHRREV_I32_e64_vi: - case AMDGPU::V_ASHRREV_I64_vi: - case AMDGPU::V_PK_LSHLREV_B16_vi: - case AMDGPU::V_PK_LSHRREV_B16_vi: - case AMDGPU::V_PK_ASHRREV_I16_vi: - return false; - default: - return true; - } + return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); } bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { Index: llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -71,6 +71,8 @@ DecodeStatus convertMIMGInst(MCInst &MI) const; MCOperand decodeOperand_VGPR_32(unsigned Val) const; + MCOperand decodeOperand_VRegOrLds_32(unsigned Val) const; + MCOperand decodeOperand_VS_32(unsigned Val) const; MCOperand decodeOperand_VS_64(unsigned Val) const; MCOperand decodeOperand_VS_128(unsigned Val) const; @@ -84,6 +86,7 @@ MCOperand decodeOperand_SReg_32(unsigned Val) const; MCOperand decodeOperand_SReg_32_XM0_XEXEC(unsigned Val) const; MCOperand decodeOperand_SReg_32_XEXEC_HI(unsigned Val) const; + MCOperand decodeOperand_SRegOrLds_32(unsigned Val) const; MCOperand decodeOperand_SReg_64(unsigned Val) const; MCOperand decodeOperand_SReg_64_XEXEC(unsigned Val) const; MCOperand decodeOperand_SReg_128(unsigned Val) const; Index: llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -97,6 +97,7 @@ DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) DECODE_OPERAND_REG(VGPR_32) +DECODE_OPERAND_REG(VRegOrLds_32) DECODE_OPERAND_REG(VS_32) DECODE_OPERAND_REG(VS_64) DECODE_OPERAND_REG(VS_128) @@ -108,6 +109,7 @@ DECODE_OPERAND_REG(SReg_32) DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) DECODE_OPERAND_REG(SReg_32_XEXEC_HI) +DECODE_OPERAND_REG(SRegOrLds_32) DECODE_OPERAND_REG(SReg_64) DECODE_OPERAND_REG(SReg_64_XEXEC) DECODE_OPERAND_REG(SReg_128) @@ -469,6 +471,10 @@ return createRegOperand(AMDGPU::VGPR_32RegClassID, Val); } +MCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const { + return decodeSrcOp(OPW32, Val); +} + MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const { return createRegOperand(AMDGPU::VReg_64RegClassID, Val); } @@ -500,6 +506,13 @@ return decodeOperand_SReg_32(Val); } +MCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const { + // table-gen generated disassembler doesn't care about operand types + // leaving only registry class so SSrc_32 operand turns into SReg_32 + // and therefore we accept immediates and literals here as well + return decodeSrcOp(OPW32, Val); +} + MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const { return decodeSrcOp(OPW64, Val); } Index: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td @@ -442,6 +442,11 @@ let AllocationPriority = 7; } +def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, + (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI, LDS_DIRECT_CLASS)> { + let isAllocatable = 0; +} + def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, (add SGPR_64Regs)> { let CopyCost = 1; let AllocationPriority = 8; @@ -511,6 +516,11 @@ let AllocationPriority = 12; } +def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, + (add VGPR_32, LDS_DIRECT_CLASS)> { + let isAllocatable = 0; +} + // Register class for all vector registers (VGPRs + Interploation Registers) def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, (add VGPR_64)> { let Size = 64; @@ -631,6 +641,12 @@ defm SSrc : RegImmOperand<"SReg", "SSrc">; +def SSrcOrLds_b32 : RegisterOperand { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_REG_IMM_INT32"; + let ParserMatchClass = RegImmMatcher<"SSrcOrLdsB32">; +} + //===----------------------------------------------------------------------===// // SCSrc_* Operands with an SGPR or a inline constant //===----------------------------------------------------------------------===// Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -802,9 +802,11 @@ switch (RCID) { case AMDGPU::SGPR_32RegClassID: case AMDGPU::VGPR_32RegClassID: + case AMDGPU::VRegOrLds_32RegClassID: case AMDGPU::VS_32RegClassID: case AMDGPU::SReg_32RegClassID: case AMDGPU::SReg_32_XM0RegClassID: + case AMDGPU::SRegOrLds_32RegClassID: return 32; case AMDGPU::SGPR_64RegClassID: case AMDGPU::VS_64RegClassID: Index: llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td @@ -142,7 +142,7 @@ // TODO: Make profile for this, there is VOP3 encoding also def V_READFIRSTLANE_B32 : InstSI <(outs SReg_32:$vdst), - (ins VGPR_32:$src0), + (ins VRegOrLds_32:$src0), "v_readfirstlane_b32 $vdst, $src0", [(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))]>, Enc32 { Index: llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td @@ -360,7 +360,7 @@ def VOP_READLANE : VOPProfile<[i32, i32, i32]> { let Outs32 = (outs SReg_32:$vdst); let Outs64 = Outs32; - let Ins32 = (ins VGPR_32:$src0, SCSrc_b32:$src1); + let Ins32 = (ins VRegOrLds_32:$src0, SCSrc_b32:$src1); let Ins64 = Ins32; let Asm32 = " $vdst, $src0, $src1"; let Asm64 = Asm32; @@ -765,7 +765,7 @@ defm V_READLANE_B32 : VOP2_Real_si <0x01>; -let InOperandList = (ins SSrc_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in { +let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in { defm V_WRITELANE_B32 : VOP2_Real_si <0x02>; } Index: llvm/trunk/test/MC/AMDGPU/lds_direct-ci.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/lds_direct-ci.s +++ llvm/trunk/test/MC/AMDGPU/lds_direct-ci.s @@ -0,0 +1,10 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=CI + +v_readfirstlane_b32 s0, lds_direct +// CI: v_readfirstlane_b32 s0, src_lds_direct ; encoding: [0xfe,0x04,0x00,0x7e] + +v_readlane_b32 s0, lds_direct, s0 +// CI: v_readlane_b32 s0, src_lds_direct, s0 ; encoding: [0xfe,0x00,0x00,0x02] + +v_writelane_b32 v0, lds_direct, s0 +// CI: v_writelane_b32 v0, src_lds_direct, s0 ; encoding: [0xfe,0x00,0x00,0x04] Index: llvm/trunk/test/MC/AMDGPU/lds_direct-err.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/lds_direct-err.s +++ llvm/trunk/test/MC/AMDGPU/lds_direct-err.s @@ -5,55 +5,77 @@ //---------------------------------------------------------------------------// s_and_b32 s2, lds_direct, s1 -// NOGFX9: error +// NOGFX9: error: invalid operand for instruction //---------------------------------------------------------------------------// -// lds_direct may not be used with V_{LSHL,LSHR,ASHL}REV opcodes +// lds_direct may not be used with "REV" opcodes //---------------------------------------------------------------------------// v_ashrrev_i16 v0, lds_direct, v0 -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct v_ashrrev_i32 v0, lds_direct, v0 -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct v_lshlrev_b16 v0, lds_direct, v0 -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct v_lshlrev_b32 v0, lds_direct, v0 -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct v_lshrrev_b16 v0, lds_direct, v0 -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct v_lshrrev_b32 v0, lds_direct, v0 -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct v_pk_ashrrev_i16 v0, lds_direct, v0 -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct v_pk_lshlrev_b16 v0, lds_direct, v0 -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct v_pk_lshrrev_b16 v0, lds_direct, v0 -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct + +v_subbrev_co_u32 v0, vcc, src_lds_direct, v0, vcc +// NOGFX9: error: invalid use of lds_direct + +v_subrev_co_u32 v0, vcc, src_lds_direct, v0 +// NOGFX9: error: invalid use of lds_direct + +v_subrev_f16 v0, src_lds_direct, v0 +// NOGFX9: error: invalid use of lds_direct + +v_subrev_u16 v0, src_lds_direct, v0 +// NOGFX9: error: invalid use of lds_direct + +v_subrev_u32 v0, src_lds_direct, v0 +// NOGFX9: error: invalid use of lds_direct + +//---------------------------------------------------------------------------// +// lds_direct may not be used with v_writelane_b32 for VI/GFX9 +//---------------------------------------------------------------------------// + +v_writelane_b32 v0, lds_direct, s0 +// NOGFX9: error: instruction not supported on this GPU //---------------------------------------------------------------------------// // lds_direct cannot be used with 64-bit and larger operands //---------------------------------------------------------------------------// v_add_f64 v[0:1], lds_direct, v[0:1] -// NOGFX9: error +// NOGFX9: error: invalid operand for instruction //---------------------------------------------------------------------------// // Only SRC0 may specify lds_direct //---------------------------------------------------------------------------// v_add_i32 v0, v0, lds_direct -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct v_add_i32 lds_direct, v0, v0 -// NOGFX9: error +// NOGFX9: error: invalid operand for instruction v_fma_f32 v0, v0, v0, lds_direct -// NOGFX9: error +// NOGFX9: error: invalid use of lds_direct Index: llvm/trunk/test/MC/AMDGPU/lds_direct.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/lds_direct.s +++ llvm/trunk/test/MC/AMDGPU/lds_direct.s @@ -31,6 +31,9 @@ v_cvt_f16_u16 v0, src_lds_direct // GFX9: v_cvt_f16_u16_e32 v0, src_lds_direct ; encoding: [0xfe,0x72,0x00,0x7e] +v_readfirstlane_b32 s0, src_lds_direct +// GFX9: v_readfirstlane_b32 s0, src_lds_direct ; encoding: [0xfe,0x04,0x00,0x7e] + //---------------------------------------------------------------------------// // VOP2/3 //---------------------------------------------------------------------------// @@ -78,6 +81,9 @@ v_max3_f16 v0, src_lds_direct, v0, v0 // GFX9: v_max3_f16 v0, src_lds_direct, v0, v0 ; encoding: [0x00,0x00,0xf7,0xd1,0xfe,0x00,0x02,0x04] +v_readlane_b32 s0, src_lds_direct, s0 +// GFX9: v_readlane_b32 s0, src_lds_direct, s0 ; encoding: [0x00,0x00,0x89,0xd2,0xfe,0x00,0x00,0x00] + //---------------------------------------------------------------------------// // VOP3P //---------------------------------------------------------------------------// @@ -107,10 +113,3 @@ v_cmp_lt_f16 vcc, lds_direct, v0 // GFX9: v_cmp_lt_f16_e32 vcc, src_lds_direct, v0 ; encoding: [0xfe,0x00,0x42,0x7c] - -//---------------------------------------------------------------------------// -// FIXME: enable lds_direct for the following opcodes and add tests -//---------------------------------------------------------------------------// - -//v_readfirstlane_b32 s0, src_lds_direct -//v_readlane_b32 s0, src_lds_direct, s0 Index: llvm/trunk/test/MC/Disassembler/AMDGPU/lds_direct_gfx9.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/AMDGPU/lds_direct_gfx9.txt +++ llvm/trunk/test/MC/Disassembler/AMDGPU/lds_direct_gfx9.txt @@ -17,3 +17,9 @@ # GFX9: v_cmpx_le_i32_e32 vcc, src_lds_direct, v0 ; encoding: [0xfe,0x00,0xa6,0x7d] 0xfe,0x00,0xa6,0x7d + +# GFX9: v_readlane_b32 s0, src_lds_direct, s0 ; encoding: [0x00,0x00,0x89,0xd2,0xfe,0x00,0x00,0x00] +0x00,0x00,0x89,0xd2,0xfe,0x00,0x00,0x00 + +# GFX9: v_readfirstlane_b32 s0, src_lds_direct ; encoding: [0xfe,0x04,0x00,0x7e] +0xfe,0x04,0x00,0x7e