Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -148,7 +148,7 @@ bool defaultTokenHasSuffix() const { StringRef Token(Tok.Data, Tok.Length); - return Token.endswith("_e32") || Token.endswith("_e64") || + return Token.endswith("_e32") || Token.endswith("_e64") || Token.endswith("_dpp"); } @@ -165,7 +165,7 @@ immediates are inlinable (e.g. "clamp" attribute is not) */ ) return false; // TODO: We should avoid using host float here. It would be better to - // check the float bit values which is what a few other places do. + // check the float bit values which is what a few other places do. // We've had bot failures before due to weird NaN support on mips hosts. const float F = BitsToFloat(Imm.Val); // TODO: Add 1/(2*pi) for VI @@ -250,7 +250,7 @@ bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } - + void setModifiers(unsigned Mods) { assert(isReg() || (isImm() && Imm.Modifiers == 0)); if (isReg()) @@ -307,7 +307,7 @@ } bool isVSrc64() const { - // TODO: Check if the 64-bit value (coming from assembly source) can be + // TODO: Check if the 64-bit value (coming from assembly source) can be // narrowed to 32 bits (in the instruction stream). That require knowledge // of instruction type (unsigned/signed, floating or "untyped"/B64), // see [AMD GCN3 ISA 6.3.1]. @@ -343,7 +343,7 @@ case Immediate: if (Imm.Type != AMDGPUOperand::ImmTyNone) OS << getImm(); - else + else OS << '<' << getImm() << " mods: " << Imm.Modifiers << '>'; break; case Token: @@ -1264,8 +1264,8 @@ typedef std::map OptionalImmIndexMap; -void addOptionalImmOperand(MCInst& Inst, const OperandVector& Operands, - OptionalImmIndexMap& OptionalIdx, +void addOptionalImmOperand(MCInst& Inst, const OperandVector& Operands, + OptionalImmIndexMap& OptionalIdx, enum AMDGPUOperand::ImmTy ImmT, int64_t Default = 0) { auto i = OptionalIdx.find(ImmT); if (i != OptionalIdx.end()) { @@ -1959,60 +1959,102 @@ return false; } -AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseDPPCtrlOps(OperandVector &Operands) { // ToDo: use same syntax as sp3 for dpp_ctrl SMLoc S = Parser.getTok().getLoc(); StringRef Prefix; int64_t Int; - - switch(getLexer().getKind()) { - default: return MatchOperand_NoMatch; - case AsmToken::Identifier: { - Prefix = Parser.getTok().getString(); + if (getLexer().getKind() == AsmToken::Identifier) { + Prefix = Parser.getTok().getString(); + } else { + return MatchOperand_NoMatch; + } + + if (Prefix == "row_mirror") { + Int = 0x140; + } else if (Prefix == "row_half_mirror") { + Int = 0x141; + } else { + Parser.Lex(); + if (getLexer().isNot(AsmToken::Colon)) + return MatchOperand_ParseFail; + + if (Prefix == "quad_perm") { + // quad_perm:[%d,%d,%d,%d] Parser.Lex(); - if (getLexer().isNot(AsmToken::Colon)) + if (getLexer().isNot(AsmToken::LBrac)) return MatchOperand_ParseFail; Parser.Lex(); if (getLexer().isNot(AsmToken::Integer)) return MatchOperand_ParseFail; + Int = getLexer().getTok().getIntVal(); - if (getParser().parseAbsoluteExpression(Int)) + Parser.Lex(); + if (getLexer().isNot(AsmToken::Comma)) return MatchOperand_ParseFail; - break; - } - } + Parser.Lex(); + if (getLexer().isNot(AsmToken::Integer)) + return MatchOperand_ParseFail; + Int += (getLexer().getTok().getIntVal() << 2); - if (Prefix.equals("row_shl")) { - Int |= 0x100; - } else if (Prefix.equals("row_shr")) { - Int |= 0x110; - } else if (Prefix.equals("row_ror")) { - Int |= 0x120; - } else if (Prefix.equals("wave_shl")) { - Int = 0x130; - } else if (Prefix.equals("wave_rol")) { - Int = 0x134; - } else if (Prefix.equals("wave_shr")) { - Int = 0x138; - } else if (Prefix.equals("wave_ror")) { - Int = 0x13C; - } else if (Prefix.equals("row_mirror")) { - Int = 0x140; - } else if (Prefix.equals("row_half_mirror")) { - Int = 0x141; - } else if (Prefix.equals("row_bcast")) { - if (Int == 15) { - Int = 0x142; - } else if (Int == 31) { - Int = 0x143; + Parser.Lex(); + if (getLexer().isNot(AsmToken::Comma)) + return MatchOperand_ParseFail; + Parser.Lex(); + if (getLexer().isNot(AsmToken::Integer)) + return MatchOperand_ParseFail; + Int += (getLexer().getTok().getIntVal() << 4); + + Parser.Lex(); + if (getLexer().isNot(AsmToken::Comma)) + return MatchOperand_ParseFail; + Parser.Lex(); + if (getLexer().isNot(AsmToken::Integer)) + return MatchOperand_ParseFail; + Int += (getLexer().getTok().getIntVal() << 6); + + Parser.Lex(); + if (getLexer().isNot(AsmToken::RBrac)) + return MatchOperand_ParseFail; + + } else { + // sel:%d + Parser.Lex(); + if (getLexer().isNot(AsmToken::Integer)) + return MatchOperand_ParseFail; + Int = getLexer().getTok().getIntVal(); + + if (Prefix == "row_shl") { + Int |= 0x100; + } else if (Prefix == "row_shr") { + Int |= 0x110; + } else if (Prefix == "row_ror") { + Int |= 0x120; + } else if (Prefix == "wave_shl") { + Int = 0x130; + } else if (Prefix == "wave_rol") { + Int = 0x134; + } else if (Prefix == "wave_shr") { + Int = 0x138; + } else if (Prefix == "wave_ror") { + Int = 0x13C; + } else if (Prefix == "row_bcast") { + if (Int == 15) { + Int = 0x142; + } else if (Int == 31) { + Int = 0x143; + } + } else { + return MatchOperand_NoMatch; + } } - } else if (!Prefix.equals("quad_perm")) { - return MatchOperand_NoMatch; } - Operands.push_back(AMDGPUOperand::CreateImm(Int, S, + Parser.Lex(); // eat last token + + Operands.push_back(AMDGPUOperand::CreateImm(Int, S, AMDGPUOperand::ImmTyDppCtrl)); return MatchOperand_Success; } @@ -2023,7 +2065,7 @@ {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, -1, nullptr} }; -AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseDPPOptionalOps(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); OperandMatchResultTy Res = parseOptionalOps(DPPOptionalOps, Operands); @@ -2049,7 +2091,7 @@ cvtDPP(Inst, Operands, false); } -void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, +void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool HasMods) { OptionalImmIndexMap OptionalIdx; Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -405,8 +405,11 @@ raw_ostream &O) { unsigned Imm = MI->getOperand(OpNo).getImm(); if (Imm <= 0x0ff) { - O << " quad_perm:"; - printU8ImmDecOperand(MI, OpNo, O); + O << " quad_perm:["; + O << formatDec(Imm & 0x3) << ","; + O << formatDec((Imm & 0xc) >> 2) << ","; + O << formatDec((Imm & 0x30) >> 4) << ","; + O << formatDec((Imm & 0xc0) >> 6) << "]"; } else if ((Imm >= 0x101) && (Imm <= 0x10f)) { O << " row_shl:"; printU4ImmDecOperand(MI, OpNo, O); @@ -425,9 +428,9 @@ } else if (Imm == 0x13c) { O << " wave_ror:1"; } else if (Imm == 0x140) { - O << " row_mirror:1"; + O << " row_mirror"; } else if (Imm == 0x141) { - O << " row_half_mirror:1"; + O << " row_half_mirror"; } else if (Imm == 0x142) { O << " row_bcast:15"; } else if (Imm == 0x143) { Index: test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll @@ -5,7 +5,7 @@ ; VI-LABEL: {{^}}dpp_test: ; VI: v_mov_b32_e32 v0, s{{[0-9]+}} ; VI: s_nop 1 -; VI: v_mov_b32_dpp v0, v0 quad_perm:1 row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11] +; VI: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11] define void @dpp_test(i32 addrspace(1)* %out, i32 %in) { %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 1) #0 store i32 %tmp0, i32 addrspace(1)* %out @@ -15,9 +15,9 @@ ; VI-LABEL: {{^}}dpp_wait_states: ; VI: v_mov_b32_e32 [[VGPR0:v[0-9]+]], s{{[0-9]+}} ; VI: s_nop 1 -; VI: v_mov_b32_dpp [[VGPR1:v[0-9]+]], [[VGPR0]] quad_perm:1 row_mask:0x1 bank_mask:0x1 bound_ctrl:0 +; VI: v_mov_b32_dpp [[VGPR1:v[0-9]+]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; VI: s_nop 1 -; VI: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:1 row_mask:0x1 bank_mask:0x1 bound_ctrl:0 +; VI: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 define void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) { %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 1) #0 %tmp1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %tmp0, i32 1, i32 1, i32 1, i1 1) #0 @@ -27,11 +27,11 @@ ; VI-LABEL: {{^}}dpp_first_in_bb: ; VI: s_nop 1 -; VI: v_mov_b32_dpp [[VGPR0:v[0-9]+]], v{{[0-9]+}} quad_perm:1 row_mask:0x1 bank_mask:0x1 bound_ctrl:0 +; VI: v_mov_b32_dpp [[VGPR0:v[0-9]+]], v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; VI: s_nop 1 -; VI: v_mov_b32_dpp [[VGPR1:v[0-9]+]], [[VGPR0]] quad_perm:1 row_mask:0x1 bank_mask:0x1 bound_ctrl:0 +; VI: v_mov_b32_dpp [[VGPR1:v[0-9]+]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; VI: s_nop 1 -; VI: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:1 row_mask:0x1 bank_mask:0x1 bound_ctrl:0 +; VI: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 define void @dpp_first_in_bb(float addrspace(1)* %out, float addrspace(1)* %in, float %cond, float %a, float %b) { %cmp = fcmp oeq float %cond, 0.0 br i1 %cmp, label %if, label %else Index: test/MC/AMDGPU/vop_dpp.s =================================================================== --- test/MC/AMDGPU/vop_dpp.s +++ test/MC/AMDGPU/vop_dpp.s @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// // NOSICI: error: -// VI: v_mov_b32_dpp v0, v0 quad_perm:37 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x25,0x00,0xff] -v_mov_b32 v0, v0 quad_perm:37 +// VI: v_mov_b32_dpp v0, v0 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x58,0x00,0xff] +v_mov_b32 v0, v0 quad_perm:[0,2,1,1] // NOSICI: error: // VI: v_mov_b32_dpp v0, v0 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x01,0xff] @@ -40,12 +40,12 @@ v_mov_b32 v0, v0 wave_ror:1 // NOSICI: error: -// VI: v_mov_b32_dpp v0, v0 row_mirror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x40,0x01,0xff] -v_mov_b32 v0, v0 row_mirror:1 +// VI: v_mov_b32_dpp v0, v0 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x40,0x01,0xff] +v_mov_b32 v0, v0 row_mirror // NOSICI: error: -// VI: v_mov_b32_dpp v0, v0 row_half_mirror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x41,0x01,0xff] -v_mov_b32 v0, v0 row_half_mirror:1 +// VI: v_mov_b32_dpp v0, v0 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x41,0x01,0xff] +v_mov_b32 v0, v0 row_half_mirror // NOSICI: error: // VI: v_mov_b32_dpp v0, v0 row_bcast:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x42,0x01,0xff] @@ -60,32 +60,32 @@ //===----------------------------------------------------------------------===// // NOSICI: error: -// VI: v_mov_b32_dpp v0, v0 quad_perm:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0xa1] -v_mov_b32 v0, v0 quad_perm:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 +// VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xa1] +v_mov_b32 v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 bound_ctrl:0 // NOSICI: error: -// VI: v_mov_b32_dpp v0, v0 quad_perm:1 row_mask:0xa bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x00,0xaf] -v_mov_b32 v0, v0 quad_perm:1 row_mask:0xa +// VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xaf] +v_mov_b32 v0, v0 quad_perm:[1,3,0,1] row_mask:0xa // NOSICI: error: -// VI: v_mov_b32_dpp v0, v0 quad_perm:1 row_mask:0xf bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x00,0xf1] -v_mov_b32 v0, v0 quad_perm:1 bank_mask:0x1 +// VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xf1] +v_mov_b32 v0, v0 quad_perm:[1,3,0,1] bank_mask:0x1 // NOSICI: error: -// VI: v_mov_b32_dpp v0, v0 quad_perm:1 row_mask:0xf bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0xff] -v_mov_b32 v0, v0 quad_perm:1 bound_ctrl:0 +// VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xff] +v_mov_b32 v0, v0 quad_perm:[1,3,0,1] bound_ctrl:0 // NOSICI: error: -// VI: v_mov_b32_dpp v0, v0 quad_perm:1 row_mask:0xa bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x00,0xa1] -v_mov_b32 v0, v0 quad_perm:1 row_mask:0xa bank_mask:0x1 +// VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xa1] +v_mov_b32 v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 // NOSICI: error: -// VI: v_mov_b32_dpp v0, v0 quad_perm:1 row_mask:0xa bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0xaf] -v_mov_b32 v0, v0 quad_perm:1 row_mask:0xa bound_ctrl:0 +// VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xaf] +v_mov_b32 v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bound_ctrl:0 // NOSICI: error: -// VI: v_mov_b32_dpp v0, v0 quad_perm:1 row_mask:0xf bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0xf1] -v_mov_b32 v0, v0 quad_perm:1 bank_mask:0x1 bound_ctrl:0 +// VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xf1] +v_mov_b32 v0, v0 quad_perm:[1,3,0,1] bank_mask:0x1 bound_ctrl:0 //===----------------------------------------------------------------------===// // Check VOP1 opcodes