Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1306,6 +1306,7 @@ bool validateOpSel(const MCInst &Inst); bool validateVccOperand(unsigned Reg) const; bool validateVOP3Literal(const MCInst &Inst) const; + unsigned getConstantBusLimit(unsigned Opcode) const; bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; @@ -2697,6 +2698,27 @@ } } +unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { + if (!isGFX10()) + return 1; + + switch (Opcode) { + // 64-bit shift instructions can use only one scalar value input + case AMDGPU::V_LSHLREV_B64: + case AMDGPU::V_LSHLREV_B64_gfx10: + case AMDGPU::V_LSHL_B64: + case AMDGPU::V_LSHRREV_B64: + case AMDGPU::V_LSHRREV_B64_gfx10: + case AMDGPU::V_LSHR_B64: + case AMDGPU::V_ASHRREV_I64: + case AMDGPU::V_ASHRREV_I64_gfx10: + case AMDGPU::V_ASHR_I64: + return 1; + default: + return 2; + } +} + bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { const MCOperand &MO = Inst.getOperand(OpIdx); if (MO.isImm()) { @@ -2782,10 +2804,7 @@ } ConstantBusUseCount += NumLiterals; - if (isGFX10()) - return ConstantBusUseCount <= 2; - - return ConstantBusUseCount <= 1; + return ConstantBusUseCount <= getConstantBusLimit(Opcode); } bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { Index: test/MC/AMDGPU/gfx10-constant-bus.s =================================================================== --- test/MC/AMDGPU/gfx10-constant-bus.s +++ test/MC/AMDGPU/gfx10-constant-bus.s @@ -2,7 +2,7 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10-ERR %s //----------------------------------------------------------------------------------------- -// On GFX10 we can use two scalar operands +// On GFX10 we can use two scalar operands (except for 64-bit shift instructions) v_add_f32 v0, s0, s1 // GFX10: v_add_f32_e64 v0, s0, s1 ; encoding: [0x00,0x00,0x03,0xd5,0x00,0x02,0x00,0x00] @@ -14,6 +14,15 @@ // GFX10: v_med3_f32 v0, s0, s0, s1 ; encoding: [0x00,0x00,0x57,0xd5,0x00,0x00,0x04,0x00] //----------------------------------------------------------------------------------------- +// 64-bit shift instructions can use only one scalar value input + +v_ashrrev_i64 v[0:1], 0x100, s[0:1] +// GFX10-ERR: error: invalid operand (violates constant bus restrictions) + +v_ashrrev_i64 v[0:1], s2, s[0:1] +// GFX10-ERR: error: invalid operand (violates constant bus restrictions) + +//----------------------------------------------------------------------------------------- // v_div_fmas implicitly reads VCC, so only one scalar operand is possible v_div_fmas_f32 v5, s3, s3, s3 @@ -38,6 +47,9 @@ // v_mad_u64_u32 has operands of different sizes. // When these operands are literals, they are counted as 2 scalar values even if literals are identical. +v_lshlrev_b64 v[5:6], 0x3f717273, 0x3f717273 +// GFX10-ERR: error: invalid operand (violates constant bus restrictions) + v_mad_u64_u32 v[5:6], s12, v1, 0x12345678, 0x12345678 // GFX10: v_mad_u64_u32 v[5:6], s12, v1, 0x12345678, 0x12345678 ; encoding: [0x05,0x0c,0x76,0xd5,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12] Index: test/MC/AMDGPU/vop3-literal.s =================================================================== --- test/MC/AMDGPU/vop3-literal.s +++ test/MC/AMDGPU/vop3-literal.s @@ -305,10 +305,6 @@ // GFX10: v_lshlrev_b64 v[5:6], v1, 0x3f717273 ; encoding: [0x05,0x00,0xff,0xd6,0x01,0xff,0x01,0x00,0x73,0x72,0x71,0x3f] // GFX9-ERR: error: invalid literal operand -v_lshlrev_b64 v[5:6], 0x3f717273, 0x3f717273 -// GFX10: v_lshlrev_b64 v[5:6], 0x3f717273, 0x3f717273 ; encoding: [0x05,0x00,0xff,0xd6,0xff,0xfe,0x01,0x00,0x73,0x72,0x71,0x3f] -// GFX9-ERR: error: invalid literal operand - v_fma_mix_f32 v5, 0x123, v2, v3 // GFX10: v_fma_mix_f32 v5, 0x123, v2, v3 ; encoding: [0x05,0x00,0x20,0xcc,0xff,0x04,0x0e,0x04,0x23,0x01,0x00,0x00]