Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -241,11 +241,11 @@ // VI Intrinsics //===----------------------------------------------------------------------===// -// llvm.amdgcn.mov.dpp.i32 +// llvm.amdgcn.mov.dpp.i32 def int_amdgcn_mov_dpp : Intrinsic<[llvm_anyint_ty], - [LLVMMatchType<0>, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty, - llvm_i32_ty], [IntrNoMem, IntrConvergent]>; + [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i1_ty], [IntrNoMem, IntrConvergent]>; def int_amdgcn_s_dcache_wb : GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">, Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -68,6 +68,10 @@ ImmTyTFE, ImmTyClamp, ImmTyOMod, + ImmTyDppCtrl, + ImmTyDppRowMask, + ImmTyDppBankMask, + ImmTyDppBoundCtrl, ImmTyDMask, ImmTyUNorm, ImmTyDA, @@ -143,7 +147,8 @@ bool defaultTokenHasSuffix() const { StringRef Token(Tok.Data, Tok.Length); - return Token.endswith("_e32") || Token.endswith("_e64"); + return Token.endswith("_e32") || Token.endswith("_e64") || + Token.endswith("_dpp"); } bool isToken() const override { @@ -233,6 +238,18 @@ bool isSLC() const { return isImmTy(ImmTySLC); } bool isTFE() const { return isImmTy(ImmTyTFE); } + bool isBankMask() const { + return isImmTy(ImmTyDppBankMask); + } + + bool isRowMask() const { + return isImmTy(ImmTyDppRowMask); + } + + bool isBoundCtrl() const { + return isImmTy(ImmTyDppBoundCtrl); + } + void setModifiers(unsigned Mods) { assert(isReg() || (isImm() && Imm.Modifiers == 0)); if (isReg()) @@ -390,6 +407,7 @@ bool isMubufOffset() const; bool isSMRDOffset() const; bool isSMRDLiteralOffset() const; + bool isDPPCtrl() const; }; class AMDGPUAsmParser : public MCTargetAsmParser { @@ -438,7 +456,6 @@ bool ParseSectionDirectiveHSARodataReadonlyAgent(); public: -public: enum AMDGPUMatchResultTy { Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY }; @@ -536,6 +553,12 @@ void cvtMIMG(MCInst &Inst, const OperandVector &Operands); OperandMatchResultTy parseVOP3OptionalOps(OperandVector &Operands); + + OperandMatchResultTy parseDPPCtrlOps(OperandVector &Operands); + OperandMatchResultTy parseDPPOptionalOps(OperandVector &Operands); + void cvtDPP_mod(MCInst &Inst, const OperandVector &Operands); + void cvtDPP_nomod(MCInst &Inst, const OperandVector &Operands); + void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool HasMods); }; struct OptionalOperand { @@ -1280,7 +1303,6 @@ AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int, int64_t Default) { - // We are at the end of the statement, and this is a default argument, so // use a default value. if (getLexer().is(AsmToken::EndOfStatement)) { @@ -1360,13 +1382,15 @@ typedef std::map OptionalImmIndexMap; -void addOptionalImmOperand(MCInst& Inst, const OperandVector& Operands, OptionalImmIndexMap& OptionalIdx, enum AMDGPUOperand::ImmTy ImmT) { +void addOptionalImmOperand(MCInst& Inst, const OperandVector& Operands, + OptionalImmIndexMap& OptionalIdx, + enum AMDGPUOperand::ImmTy ImmT, int64_t Default = 0) { auto i = OptionalIdx.find(ImmT); if (i != OptionalIdx.end()) { unsigned Idx = i->second; ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); } else { - Inst.addOperand(MCOperand::createImm(0)); + Inst.addOperand(MCOperand::createImm(Default)); } } @@ -1987,6 +2011,152 @@ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); } +//===----------------------------------------------------------------------===// +// dpp +//===----------------------------------------------------------------------===// + +bool AMDGPUOperand::isDPPCtrl() const { + bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); + if (result) { + int64_t Imm = getImm(); + return ((Imm >= 0x000) && (Imm <= 0x0ff)) || + ((Imm >= 0x101) && (Imm <= 0x10f)) || + ((Imm >= 0x111) && (Imm <= 0x11f)) || + ((Imm >= 0x121) && (Imm <= 0x12f)) || + (Imm == 0x130) || + (Imm == 0x134) || + (Imm == 0x138) || + (Imm == 0x13c) || + (Imm == 0x140) || + (Imm == 0x141) || + (Imm == 0x142) || + (Imm == 0x143); + } + return false; +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseDPPCtrlOps(OperandVector &Operands) { + // ToDo: use same syntax as sp3 for dpp_ctrl + SMLoc S = Parser.getTok().getLoc(); + StringRef Prefix; + int64_t Int; + + switch(getLexer().getKind()) { + default: return MatchOperand_NoMatch; + case AsmToken::Identifier: { + Prefix = Parser.getTok().getString(); + + Parser.Lex(); + if (getLexer().isNot(AsmToken::Colon)) + return MatchOperand_ParseFail; + + Parser.Lex(); + if (getLexer().isNot(AsmToken::Integer)) + return MatchOperand_ParseFail; + + if (getParser().parseAbsoluteExpression(Int)) + return MatchOperand_ParseFail; + break; + } + } + + if (Prefix.equals("row_shl")) { + Int |= 0x100; + } else if (Prefix.equals("row_shr")) { + Int |= 0x110; + } else if (Prefix.equals("row_ror")) { + Int |= 0x120; + } else if (Prefix.equals("wave_shl")) { + Int = 0x130; + } else if (Prefix.equals("wave_rol")) { + Int = 0x134; + } else if (Prefix.equals("wave_shr")) { + Int = 0x138; + } else if (Prefix.equals("wave_ror")) { + Int = 0x13C; + } else if (Prefix.equals("row_mirror")) { + Int = 0x140; + } else if (Prefix.equals("row_half_mirror")) { + Int = 0x141; + } else if (Prefix.equals("row_bcast")) { + if (Int == 15) { + Int = 0x142; + } else if (Int == 31) { + Int = 0x143; + } + } else if (!Prefix.equals("quad_perm")) { + return MatchOperand_NoMatch; + } + Operands.push_back(AMDGPUOperand::CreateImm(Int, S, + AMDGPUOperand::ImmTyDppCtrl)); + return MatchOperand_Success; +} + +static const OptionalOperand DPPOptionalOps [] = { + {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, 0xf, nullptr}, + {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, 0xf, nullptr}, + {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, -1, nullptr} +}; + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseDPPOptionalOps(OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + OperandMatchResultTy Res = parseOptionalOps(DPPOptionalOps, Operands); + // XXX - sp3 use syntax "bound_ctrl:0" to indicate that bound_ctrl bit was set + if (Res == MatchOperand_Success) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back()); + // If last operand was parsed as bound_ctrl we should replace it with correct value (1) + if (Op.isImmTy(AMDGPUOperand::ImmTyDppBoundCtrl)) { + Operands.pop_back(); + Operands.push_back( + AMDGPUOperand::CreateImm(1, S, AMDGPUOperand::ImmTyDppBoundCtrl)); + return MatchOperand_Success; + } + } + return Res; +} + +void AMDGPUAsmParser::cvtDPP_mod(MCInst &Inst, const OperandVector &Operands) { + cvtDPP(Inst, Operands, true); +} + +void AMDGPUAsmParser::cvtDPP_nomod(MCInst &Inst, const OperandVector &Operands) { + cvtDPP(Inst, Operands, false); +} + +void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, + bool HasMods) { + OptionalImmIndexMap OptionalIdx; + + unsigned I = 1; + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { + ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); + } + + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + // Add the register arguments + if (!HasMods && Op.isReg()) { + Op.addRegOperands(Inst, 1); + } else if (HasMods && Op.isRegOrImmWithInputMods()) { + Op.addRegOrImmWithInputModsOperands(Inst, 2); + } else if (Op.isDPPCtrl()) { + Op.addImmOperands(Inst, 1); + } else if (Op.isImm()) { + // Handle optional arguments + OptionalIdx[Op.getImmTy()] = I; + } else { + llvm_unreachable("Invalid operand type"); + } + } + + // ToDo: fix default values for row_mask and bank_mask + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); +} /// Force static initialization. extern "C" void LLVMInitializeAMDGPUAsmParser() { Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -33,8 +33,10 @@ const MCRegisterInfo &MRI); private: + void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU4ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU32ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); @@ -61,6 +63,10 @@ void printImmediate64(uint64_t I, raw_ostream &O); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printOperandAndMods(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printDPPCtrlOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printRowMaskOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBankMaskOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBoundCtrlOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); static void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -28,6 +28,11 @@ printAnnotation(OS, Annot); } +void AMDGPUInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << formatHex(MI->getOperand(OpNo).getImm() & 0xf); +} + void AMDGPUInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { O << formatHex(MI->getOperand(OpNo).getImm() & 0xff); @@ -43,6 +48,11 @@ O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff); } +void AMDGPUInstPrinter::printU4ImmDecOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << formatDec(MI->getOperand(OpNo).getImm() & 0xf); +} + void AMDGPUInstPrinter::printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { O << formatDec(MI->getOperand(OpNo).getImm() & 0xff); @@ -251,6 +261,8 @@ raw_ostream &O) { if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3) O << "_e64 "; + else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::DPP) + O << "_dpp "; else O << "_e32 "; @@ -388,6 +400,63 @@ O << '|'; } + +void AMDGPUInstPrinter::printDPPCtrlOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNo).getImm(); + if ((Imm >= 0x000) && (Imm <= 0x0ff)) { + O << " quad_perm:"; + printU8ImmDecOperand(MI, OpNo, O); + } else if ((Imm >= 0x101) && (Imm <= 0x10f)) { + O << " row_shl:"; + printU4ImmDecOperand(MI, OpNo, O); + } else if ((Imm >= 0x111) && (Imm <= 0x11f)) { + O << " row_shr:"; + printU4ImmDecOperand(MI, OpNo, O); + } else if ((Imm >= 0x121) && (Imm <= 0x12f)) { + O << " row_ror:"; + printU4ImmDecOperand(MI, OpNo, O); + } else if (Imm == 0x130) { + O << " wave_shl:1"; + } else if (Imm == 0x134) { + O << " wave_rol:1"; + } else if (Imm == 0x138) { + O << " wave_shr:1"; + } else if (Imm == 0x13c) { + O << " wave_ror:1"; + } else if (Imm == 0x140) { + O << " row_mirror:1"; + } else if (Imm == 0x141) { + O << " row_half_mirror:1"; + } else if (Imm == 0x142) { + O << " row_bcast:15"; + } else if (Imm == 0x143) { + O << " row_bcast:31"; + } else { + llvm_unreachable("Invalid dpp_ctrl value"); + } +} + +void AMDGPUInstPrinter::printRowMaskOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << " row_mask:"; + printU4ImmOperand(MI, OpNo, O); +} + +void AMDGPUInstPrinter::printBankMaskOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << " bank_mask:"; + printU4ImmOperand(MI, OpNo, O); +} + +void AMDGPUInstPrinter::printBoundCtrlOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNo).getImm(); + if (Imm) { + O << " bound_ctrl:0"; // XXX - this syntax is used in sp3 + } +} + void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); Index: lib/Target/AMDGPU/SIDefines.h =================================================================== --- lib/Target/AMDGPU/SIDefines.h +++ lib/Target/AMDGPU/SIDefines.h @@ -29,16 +29,17 @@ VOP2 = 1 << 11, VOP3 = 1 << 12, VOPC = 1 << 13, - - MUBUF = 1 << 14, - MTBUF = 1 << 15, - SMRD = 1 << 16, - DS = 1 << 17, - MIMG = 1 << 18, - FLAT = 1 << 19, - WQM = 1 << 20, - VGPRSpill = 1 << 21, - VOPAsmPrefer32Bit = 1 << 22 + DPP = 1 << 14, + + MUBUF = 1 << 15, + MTBUF = 1 << 16, + SMRD = 1 << 17, + DS = 1 << 18, + MIMG = 1 << 19, + FLAT = 1 << 20, + WQM = 1 << 21, + VGPRSpill = 1 << 22, + VOPAsmPrefer32Bit = 1 << 23 }; } Index: lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/SIInstrFormats.td +++ lib/Target/AMDGPU/SIInstrFormats.td @@ -31,6 +31,7 @@ field bits<1> VOP2 = 0; field bits<1> VOP3 = 0; field bits<1> VOPC = 0; + field bits<1> DPP = 0; field bits<1> MUBUF = 0; field bits<1> MTBUF = 0; @@ -63,16 +64,17 @@ let TSFlags{11} = VOP2; let TSFlags{12} = VOP3; let TSFlags{13} = VOPC; - - let TSFlags{14} = MUBUF; - let TSFlags{15} = MTBUF; - let TSFlags{16} = SMRD; - let TSFlags{17} = DS; - let TSFlags{18} = MIMG; - let TSFlags{19} = FLAT; - let TSFlags{20} = WQM; - let TSFlags{21} = VGPRSpill; - let TSFlags{22} = VOPAsmPrefer32Bit; + let TSFlags{14} = DPP; + + let TSFlags{15} = MUBUF; + let TSFlags{16} = MTBUF; + let TSFlags{17} = SMRD; + let TSFlags{18} = DS; + let TSFlags{19} = MIMG; + let TSFlags{20} = FLAT; + let TSFlags{21} = WQM; + let TSFlags{22} = VGPRSpill; + let TSFlags{23} = VOPAsmPrefer32Bit; let SchedRW = [Write32Bit]; Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -534,6 +534,22 @@ "isSMRDLiteralOffset" >; +def DPPCtrlMatchClass : AsmOperandClass { + let Name = "DPPCtrl"; + let PredicateMethod = "isDPPCtrl"; + let ParserMethod = "parseDPPCtrlOps"; + let RenderMethod = "addImmOperands"; + let IsOptional = 0; +} + +class DPPOptionalMatchClass : AsmOperandClass { + let Name = "DPPOptional"#OpName; + let PredicateMethod = "is"#OpName; + let ParserMethod = "parseDPPOptionalOps"; + let RenderMethod = "addImmOperands"; + let IsOptional = 1; +} + class OptionalImmAsmOperand : AsmOperandClass { let Name = "Imm"#OpName; let PredicateMethod = "isImm"; @@ -668,6 +684,26 @@ let ParserMatchClass = NamedBitMatchClass<"LWE">; } +def dpp_ctrl : Operand { + let PrintMethod = "printDPPCtrlOperand"; + let ParserMatchClass = DPPCtrlMatchClass; +} + +def row_mask : Operand { + let PrintMethod = "printRowMaskOperand"; + let ParserMatchClass = DPPOptionalMatchClass<"RowMask">; +} + +def bank_mask : Operand { + let PrintMethod = "printBankMaskOperand"; + let ParserMatchClass = DPPOptionalMatchClass<"BankMask">; +} + +def bound_ctrl : Operand { + let PrintMethod = "printBoundCtrlOperand"; + let ParserMatchClass = DPPOptionalMatchClass<"BoundCtrl">; +} + } // End OperandType = "OPERAND_IMMEDIATE" @@ -1280,24 +1316,25 @@ !if (!eq(HasModifiers, 1), // VOP1_DPP with modifiers (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0, - i32imm:$dpp_ctrl, i1imm:$bound_ctrl, - i32imm:$bank_mask, i32imm:$row_mask) + dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl) /* else */, // VOP1_DPP without modifiers - (ins Src0RC:$src0, i32imm:$dpp_ctrl, i1imm:$bound_ctrl, - i32imm:$bank_mask, i32imm:$row_mask) + (ins Src0RC:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl) /* endif */) - /* NumSrcArgs == 2 */, + /* NumSrcArgs == 2 */, !if (!eq(HasModifiers, 1), // VOP2_DPP with modifiers (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0, - InputModsNoDefault:$src1_modifiers, Src1RC:$src1, - i32imm:$dpp_ctrl, i1imm:$bound_ctrl, - i32imm:$bank_mask, i32imm:$row_mask) + InputModsNoDefault:$src1_modifiers, Src1RC:$src1, + dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl) /* else */, // VOP2_DPP without modifiers - (ins Src0RC:$src0, Src1RC:$src1, i32imm:$dpp_ctrl, i1imm:$bound_ctrl, - i32imm:$bank_mask, i32imm:$row_mask) + (ins Src0RC:$src0, Src1RC:$src1, dpp_ctrl:$dpp_ctrl, + row_mask:$row_mask, bank_mask:$bank_mask, + bound_ctrl:$bound_ctrl) /* endif */)); } @@ -1338,8 +1375,8 @@ " $src1_modifiers,")); string args = !if(!eq(HasModifiers, 0), getAsm32<0, NumSrcArgs, DstVT>.ret, - src0#src1); - string ret = " "#dst#args#", $dpp_ctrl, "#"$bound_ctrl, "#"$bank_mask, "#"$row_mask"; + ", "#src0#src1); + string ret = dst#args#" $dpp_ctrl $row_mask $bank_mask $bound_ctrl"; } class VOPProfile _ArgVT> { @@ -1351,7 +1388,7 @@ field ValueType Src1VT = ArgVT[2]; field ValueType Src2VT = ArgVT[3]; field RegisterOperand DstRC = getVALUDstForVT.ret; - field RegisterClass DstRCDPP = !if(!eq(DstVT.Size, 64), VReg_64, VGPR_32); + field RegisterOperand DstRCDPP = getVALUDstForVT.ret; field RegisterOperand Src0RC32 = getVOPSrc0ForVT.ret; field RegisterClass Src1RC32 = getVOPSrc1ForVT.ret; field RegisterOperand Src0RC64 = getVOP3SrcForVT.ret; @@ -1497,8 +1534,14 @@ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2); let Ins64 = getIns64, 3, HasModifiers>.ret; + let InsDPP = (ins InputModsNoDefault:$src0_modifiers, Src0RC32:$src0, + InputModsNoDefault:$src1_modifiers, Src1RC32:$src1, + VGPR_32:$src2, // stub argument + dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let Asm32 = getAsm32<1, 2, f32>.ret; let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret; + let AsmDPP = getAsmDPP<1, 2, HasModifiers, f32>.ret; } def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; @@ -1607,9 +1650,9 @@ class VOP1_DPP : VOP1_DPPe , - VOP_DPP { + VOP_DPP { let AssemblerPredicates = [isVI]; - let src0_modifiers = !if(p.HasModifiers, ?, 0); + let src0_modifiers = !if(p.HasModifiers, ?, 0); let src1_modifiers = 0; } @@ -1667,6 +1710,14 @@ } +class VOP2_DPP : + VOP2_DPPe , + VOP_DPP { + let AssemblerPredicates = [isVI]; + let src0_modifiers = !if(p.HasModifiers, ?, 0); + let src1_modifiers = !if(p.HasModifiers, ?, 0); +} + class VOP3DisableFields { bits<2> src0_modifiers = !if(HasModifiers, ?, 0); @@ -1929,6 +1980,8 @@ defm _e64 : VOP3_2_m ; + + def _dpp : VOP2_DPP ; } multiclass VOP2Inst pattern> : +class VOP_DPP pattern, bit HasMods = 0> : VOPAnyCommon { + let DPP = 1; let Size = 8; + + let AsmMatchConverter = !if(!eq(HasMods,1), "cvtDPP_mod", "cvtDPP_nomod"); } class VOP_DPPe : Enc64 { @@ -203,7 +206,7 @@ let Inst{31-25} = 0x3f; //encoding } -class VOP2_DPPe op> : Enc32 { +class VOP2_DPPe op> : VOP_DPPe { bits<8> vdst; bits<8> src1; Index: lib/Target/AMDGPU/VIInstructions.td =================================================================== --- lib/Target/AMDGPU/VIInstructions.td +++ lib/Target/AMDGPU/VIInstructions.td @@ -121,10 +121,10 @@ //===----------------------------------------------------------------------===// def : Pat < - (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$bound_ctrl, - imm:$bank_mask, imm:$row_mask), - (V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i1imm $bound_ctrl), - (as_i32imm $bank_mask), (as_i32imm $row_mask)) + (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, + imm:$bound_ctrl), + (V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask), + (as_i32imm $bank_mask), (as_i1imm $bound_ctrl)) >; //===----------------------------------------------------------------------===// Index: test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll @@ -1,13 +1,13 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI %s ; VI-LABEL: {{^}}dpp_test: -; VI: v_mov_b32 v0, v0, 1, -1, 1, 1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11] +; VI: v_mov_b32_dpp v0, v0 quad_perm:1 row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11] define void @dpp_test(i32 addrspace(1)* %out, i32 %in) { - %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i1 1, i32 1, i32 1) #0 + %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 1) #0 store i32 %tmp0, i32 addrspace(1)* %out ret void } -declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32, i1, i32, i32) #0 +declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32, i32, i32, i1) #0 attributes #0 = { nounwind readnone convergent } Index: test/MC/AMDGPU/vop_dpp.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/vop_dpp.s @@ -0,0 +1,143 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI +// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI + +//===----------------------------------------------------------------------===// +// Check dpp_ctrl values +//===----------------------------------------------------------------------===// + +// NOSICI: error: +// VI: v_mov_b32_dpp v0, v0 quad_perm:37 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x25,0x00,0xff] +v_mov_b32 v0, v0 quad_perm:37 + +// NOSICI: error: +// VI: v_mov_b32_dpp v0, v0 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x01,0xff] +v_mov_b32 v0, v0 row_shl:1 + +// NOSICI: error: +// VI: v_mov_b32_dpp v0, v0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x1f,0x01,0xff] +v_mov_b32 v0, v0 row_shr:0xf + +// NOSICI: error: +// VI: v_mov_b32_dpp v0, v0 row_ror:12 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x2c,0x01,0xff] +v_mov_b32 v0, v0 row_ror:0xc + +// NOSICI: error: +// VI: v_mov_b32_dpp v0, v0 wave_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x30,0x01,0xff] +v_mov_b32 v0, v0 wave_shl:1 + +// NOSICI: error: +// VI: v_mov_b32_dpp v0, v0 wave_rol:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x34,0x01,0xff] +v_mov_b32 v0, v0 wave_rol:1 + +// NOSICI: error: +// VI: v_mov_b32_dpp v0, v0 wave_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x38,0x01,0xff] +v_mov_b32 v0, v0 wave_shr:1 + +// NOSICI: error: +// VI: v_mov_b32_dpp v0, v0 wave_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x3c,0x01,0xff] +v_mov_b32 v0, v0 wave_ror:1 + +// NOSICI: error: +// VI: v_mov_b32_dpp v0, v0 row_mirror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x40,0x01,0xff] +v_mov_b32 v0, v0 row_mirror:1 + +// NOSICI: error: +// VI: v_mov_b32_dpp v0, v0 row_half_mirror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x41,0x01,0xff] +v_mov_b32 v0, v0 row_half_mirror:1 + +// NOSICI: error: +// VI: v_mov_b32_dpp v0, v0 row_bcast:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x42,0x01,0xff] +v_mov_b32 v0, v0 row_bcast:15 + +// NOSICI: error: +// VI: v_mov_b32_dpp v0, v0 row_bcast:31 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x43,0x01,0xff] +v_mov_b32 v0, v0 row_bcast:31 + +//===----------------------------------------------------------------------===// +// Check optional fields +//===----------------------------------------------------------------------===// + +// NOSICI: error: +// VI: v_mov_b32_dpp v0, v0 quad_perm:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0xa1] +v_mov_b32 v0, v0 quad_perm:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 + +// NOSICI: error: +// VI: v_mov_b32_dpp v0, v0 quad_perm:1 row_mask:0xa bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x00,0xaf] +v_mov_b32 v0, v0 quad_perm:1 row_mask:0xa + +// NOSICI: error: +// VI: v_mov_b32_dpp v0, v0 quad_perm:1 row_mask:0xf bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x00,0xf1] +v_mov_b32 v0, v0 quad_perm:1 bank_mask:0x1 + +// NOSICI: error: +// VI: v_mov_b32_dpp v0, v0 quad_perm:1 row_mask:0xf bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0xff] +v_mov_b32 v0, v0 quad_perm:1 bound_ctrl:0 + +// NOSICI: error: +// VI: v_mov_b32_dpp v0, v0 quad_perm:1 row_mask:0xa bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x00,0xa1] +v_mov_b32 v0, v0 quad_perm:1 row_mask:0xa bank_mask:0x1 + +// NOSICI: error: +// VI: v_mov_b32_dpp v0, v0 quad_perm:1 row_mask:0xa bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0xaf] +v_mov_b32 v0, v0 quad_perm:1 row_mask:0xa bound_ctrl:0 + +// NOSICI: error: +// VI: v_mov_b32_dpp v0, v0 quad_perm:1 row_mask:0xf bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0xf1] +v_mov_b32 v0, v0 quad_perm:1 bank_mask:0x1 bound_ctrl:0 + +//===----------------------------------------------------------------------===// +// Check VOP1 opcodes +//===----------------------------------------------------------------------===// +// ToDo: v_nop + +// NOSICI: error: +// VI: v_cvt_u32_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x0e,0x00,0x7e,0x00,0x01,0x09,0xa1] +v_cvt_u32_f32 v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 + +// NOSICI: error: +// VI: v_fract_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x36,0x00,0x7e,0x00,0x01,0x09,0xa1] +v_fract_f32 v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 + +// NOSICI: error: +// VI: v_sin_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x52,0x00,0x7e,0x00,0x01,0x09,0xa1] +v_sin_f32 v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 + +//===----------------------------------------------------------------------===// +// Check VOP2 opcodes +//===----------------------------------------------------------------------===// +// ToDo: VOP2bInst instructions: v_add_u32, v_sub_u32 ... (vcc and ApplyMnemonic in AsmMatcherEmitter.cpp) +// ToDo: v_mac_f32 (VOP_MAC) + +// NOSICI: error: +// VI: v_add_f32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x09,0xa1] +v_add_f32 v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 + +// NOSICI: error: +// VI: v_min_f32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x14,0x00,0x01,0x09,0xa1] +v_min_f32 v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 + +// NOSICI: error: +// VI: v_and_b32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x26,0x00,0x01,0x09,0xa1] +v_and_b32 v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 + +//===----------------------------------------------------------------------===// +// Check modifiers +//===----------------------------------------------------------------------===// + +// NOSICI: error: +// VI: v_add_f32_dpp v0, -v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x19,0xa1] +v_add_f32 v0, -v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 + +// NOSICI: error: +// VI: v_add_f32_dpp v0, v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x89,0xa1] +v_add_f32 v0, v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 + +// NOSICI: error: +// VI: v_add_f32_dpp v0, -v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x99,0xa1] +v_add_f32 v0, -v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 + +// NOSICI: error: +// VI: v_add_f32_dpp v0, |v0|, -v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x69,0xa1] +v_add_f32 v0, |v0|, -v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0