diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -454,10 +454,10 @@ "Support DPP8 (Data Parallel Primitives) extension" >; -def Feature64BitDPP : SubtargetFeature<"dpp-64bit", - "Has64BitDPP", +def FeatureDPALU_DPP : SubtargetFeature<"dpp-64bit", + "HasDPALU_DPP", "true", - "Support DPP (Data Parallel Primitives) extension" + "Support DPP (Data Parallel Primitives) extension in DP ALU" >; def FeaturePackedFP32Ops : SubtargetFeature<"packed-fp32-ops", @@ -1179,7 +1179,7 @@ !listconcat(FeatureISAVersion9_0_MI_Common.Features, [FeatureGFX90AInsts, FeatureFmacF64Inst, - Feature64BitDPP, + FeatureDPALU_DPP, FeaturePackedFP32Ops, FeatureAtomicFaddRtnInsts, FeatureAtomicBufferGlobalPkAddF16Insts, @@ -1213,7 +1213,7 @@ FeatureDot10Insts, FeatureAtomicDsPkAdd16Insts, FeatureAtomicFlatPkAdd16Insts, - Feature64BitDPP, + FeatureDPALU_DPP, FeaturePackedFP32Ops, FeatureMAIInsts, FeatureFP8Insts, @@ -1699,8 +1699,8 @@ def HasDPP8 : Predicate<"Subtarget->hasDPP8()">, AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureDPP8)>; -def Has64BitDPP : Predicate<"Subtarget->has64BitDPP()">, - AssemblerPredicate<(all_of Feature64BitDPP)>; +def HasDPALU_DPP : Predicate<"Subtarget->hasDPALU_DPP()">, + AssemblerPredicate<(all_of FeatureDPALU_DPP)>; def HasPackedFP32Ops : Predicate<"Subtarget->hasPackedFP32Ops()">, AssemblerPredicate<(all_of FeaturePackedFP32Ops)>; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1990,7 +1990,7 @@ return isRegClass(AMDGPU::VGPR_32RegClassID) || // GFX90A allows DPP on 64-bit operands. (isRegClass(AMDGPU::VReg_64RegClassID) && - AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); + AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]); } bool AMDGPUOperand::isT16VRegWithInputMods() const { @@ -4196,15 +4196,12 @@ return true; unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); - if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { - // DPP64 is supported for row_newbcast only. - int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); - if (Src0Idx >= 0 && - getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { - SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); - Error(S, "64 bit dpp only supports row_newbcast"); - return false; - } + if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) && + AMDGPU::isDPALU_DPP(MII.get(Opc))) { + // DP ALU DPP is supported for row_newbcast only on GFX9* + SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); + Error(S, "DP ALU dpp only supports row_newbcast"); + return false; } return true; diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -505,7 +505,7 @@ MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp) { auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl); assert(DppCtrl && DppCtrl->isImm()); - if (!AMDGPU::isLegal64BitDPPControl(DppCtrl->getImm())) { + if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl->getImm())) { LLVM_DEBUG(dbgs() << " failed: 64 bit dpp move uses unsupported" " control value\n"); // Let it split, then control may become legal. @@ -728,7 +728,7 @@ ++NumDPPMovsCombined; } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO || MI.getOpcode() == AMDGPU::V_MOV_B64_dpp) { - if (ST->has64BitDPP() && combineDPPMov(MI)) { + if (ST->hasDPALU_DPP() && combineDPPMov(MI)) { Changed = true; ++NumDPPMovsCombined; } else { diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -125,7 +125,7 @@ bool HasSDWAOutModsVOPC = false; bool HasDPP = false; bool HasDPP8 = false; - bool Has64BitDPP = false; + bool HasDPALU_DPP = false; bool HasPackedFP32Ops = false; bool HasImageInsts = false; bool HasExtendedImageInsts = false; @@ -908,8 +908,8 @@ return HasDPP8; } - bool has64BitDPP() const { - return Has64BitDPP; + bool hasDPALU_DPP() const { + return HasDPALU_DPP; } bool hasPackedFP32Ops() const { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -846,13 +846,9 @@ unsigned Imm = MI->getOperand(OpNo).getImm(); const MCInstrDesc &Desc = MII.get(MI->getOpcode()); - int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), - AMDGPU::OpName::src0); - if (Src0Idx >= 0 && - Desc.operands()[Src0Idx].RegClass == AMDGPU::VReg_64RegClassID && - !AMDGPU::isLegal64BitDPPControl(Imm)) { - O << " /* 64 bit dpp only supports row_newbcast */"; + if (!AMDGPU::isLegalDPALU_DPPControl(Imm) && AMDGPU::isDPALU_DPP(Desc)) { + O << " /* DP ALU dpp only supports row_newbcast */"; return; } else if (Imm <= DppCtrl::QUAD_PERM_LAST) { O << "quad_perm:["; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2369,7 +2369,7 @@ assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); if (ST.hasMovB64() && - AMDGPU::isLegal64BitDPPControl( + AMDGPU::isLegalDPALU_DPPControl( getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl)->getImm())) { MI.setDesc(get(AMDGPU::V_MOV_B64_dpp)); return std::pair(&MI, nullptr); @@ -4809,20 +4809,10 @@ } } - int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); - if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO && - ((DstIdx >= 0 && - (Desc.operands()[DstIdx].RegClass == AMDGPU::VReg_64RegClassID || - Desc.operands()[DstIdx].RegClass == - AMDGPU::VReg_64_Align2RegClassID)) || - ((Src0Idx >= 0 && - (Desc.operands()[Src0Idx].RegClass == AMDGPU::VReg_64RegClassID || - Desc.operands()[Src0Idx].RegClass == - AMDGPU::VReg_64_Align2RegClassID)))) && - !AMDGPU::isLegal64BitDPPControl(DC)) { + !AMDGPU::isLegalDPALU_DPPControl(DC) && AMDGPU::isDPALU_DPP(Desc)) { ErrInfo = "Invalid dpp_ctrl value: " - "64 bit dpp only support row_newbcast"; + "DP ALU dpp only support row_newbcast"; return false; } } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1331,10 +1331,16 @@ bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); LLVM_READNONE -inline bool isLegal64BitDPPControl(unsigned DC) { +inline bool isLegalDPALU_DPPControl(unsigned DC) { return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST; } +/// \returns true if an instruction may have a 64-bit VGPR operand. +bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc); + +/// \returns true if an instruction is a DP ALU DPP. +bool isDPALU_DPP(const MCInstrDesc &OpDesc); + /// \returns true if the intrinsic is divergent bool isIntrinsicSourceOfDivergence(unsigned IntrID); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2740,6 +2740,25 @@ : getGfx9BufferFormatInfo(Format); } +bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) { + for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1, + OpName::src2 }) { + int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName); + if (Idx == -1) + continue; + + if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID || + OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID) + return true; + } + + return false; +} + +bool isDPALU_DPP(const MCInstrDesc &OpDesc) { + return hasAny64BitVGPROperands(OpDesc); +} + } // namespace AMDGPU raw_ostream &operator<<(raw_ostream &OS, diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -791,8 +791,8 @@ string AsmOperands = asmOps; let AsmMatchConverter = !if(P.HasModifiers, "cvtDPP", ""); - let SubtargetPredicate = !if(P.HasExt64BitDPP, Has64BitDPP, HasDPP); - let AssemblerPredicate = !if(P.HasExt64BitDPP, Has64BitDPP, HasDPP); + let SubtargetPredicate = !if(P.HasExt64BitDPP, HasDPALU_DPP, HasDPP); + let AssemblerPredicate = !if(P.HasExt64BitDPP, HasDPALU_DPP, HasDPP); let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP, AMDGPUAsmVariants.Disable); let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", ""); @@ -862,8 +862,8 @@ let Size = 8; let AsmMatchConverter = !if(P.HasModifiers, "cvtDPP", ""); - let SubtargetPredicate = !if(P.HasExt64BitDPP, Has64BitDPP, HasDPP); - let AssemblerPredicate = !if(P.HasExt64BitDPP, Has64BitDPP, HasDPP); + let SubtargetPredicate = !if(P.HasExt64BitDPP, HasDPALU_DPP, HasDPP); + let AssemblerPredicate = !if(P.HasExt64BitDPP, HasDPALU_DPP, HasDPP); let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP, AMDGPUAsmVariants.Disable); let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", ""); diff --git a/llvm/test/MC/AMDGPU/gfx90a_err.s b/llvm/test/MC/AMDGPU/gfx90a_err.s --- a/llvm/test/MC/AMDGPU/gfx90a_err.s +++ b/llvm/test/MC/AMDGPU/gfx90a_err.s @@ -139,16 +139,16 @@ // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand v_ceil_f64_dpp v[0:1], v[2:3] quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: 64 bit dpp only supports row_newbcast +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_newbcast v_ceil_f64_dpp v[0:1], v[2:3] row_shl:1 row_mask:0xf bank_mask:0xf -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: 64 bit dpp only supports row_newbcast +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_newbcast v_ceil_f64_dpp v[0:1], v[2:3] wave_ror:1 row_mask:0xf bank_mask:0xf -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: 64 bit dpp only supports row_newbcast +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_newbcast v_cvt_u32_f64 v5, v[0:1] quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: 64 bit dpp only supports row_newbcast +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_newbcast v_ceil_f64_dpp v[0:1], v[2:3] row_share:1 row_mask:0xf bank_mask:0xf // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/gfx940_asm_features.s b/llvm/test/MC/AMDGPU/gfx940_asm_features.s --- a/llvm/test/MC/AMDGPU/gfx940_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx940_asm_features.s @@ -651,8 +651,8 @@ v_cvt_pk_f32_bf8 v[2:3], s3 src0_sel:WORD_1 // NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_f32_bf8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xae,0x00,0x7e,0x03,0x58,0x00,0xff] -v_cvt_pk_f32_bf8 v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf +// GFX940: v_cvt_pk_f32_bf8_dpp v[0:1], v3 row_newbcast:3 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xae,0x00,0x7e,0x03,0x53,0x01,0xff] +v_cvt_pk_f32_bf8 v[0:1], v3 row_newbcast:3 // NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU // GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08] @@ -687,8 +687,8 @@ v_cvt_pk_f32_fp8 v[2:3], 3 src0_sel:WORD_1 // NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_f32_fp8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xac,0x00,0x7e,0x03,0x58,0x00,0xff] -v_cvt_pk_f32_fp8 v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf +// GFX940: v_cvt_pk_f32_fp8_dpp v[0:1], v3 row_newbcast:3 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xac,0x00,0x7e,0x03,0x53,0x01,0xff] +v_cvt_pk_f32_fp8 v[0:1], v3 row_newbcast:3 // NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU // GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08] diff --git a/llvm/test/MC/AMDGPU/gfx940_err.s b/llvm/test/MC/AMDGPU/gfx940_err.s --- a/llvm/test/MC/AMDGPU/gfx940_err.s +++ b/llvm/test/MC/AMDGPU/gfx940_err.s @@ -28,7 +28,7 @@ // GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mov_b64 v[2:3], v[4:5] row_shl:1 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: 64 bit dpp only supports row_newbcast +// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_newbcast v_mov_b64 v[2:3], -v[4:5] // GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt @@ -450,8 +450,8 @@ # GFX940: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x06,0x85,0x00] 0xf9,0xae,0x04,0x7e,0x03,0x06,0x85,0x00 -# GFX940: v_cvt_pk_f32_bf8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xae,0x00,0x7e,0x03,0x58,0x00,0xff] -0xfa,0xae,0x00,0x7e,0x03,0x58,0x00,0xff +# GFX940: v_cvt_pk_f32_bf8_dpp v[0:1], v3 row_newbcast:3 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xae,0x00,0x7e,0x03,0x53,0x01,0xff] +0xfa,0xae,0x00,0x7e,0x03,0x53,0x01,0xff # GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08] 0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08 @@ -477,8 +477,8 @@ # GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], 3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x83,0x06,0x85,0x00] 0xf9,0xac,0x04,0x7e,0x83,0x06,0x85,0x00 -# GFX940: v_cvt_pk_f32_fp8_dpp v[0:1], v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xac,0x00,0x7e,0x03,0x58,0x00,0xff] -0xfa,0xac,0x00,0x7e,0x03,0x58,0x00,0xff +# GFX940: v_cvt_pk_f32_fp8_dpp v[0:1], v3 row_newbcast:3 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xac,0x00,0x7e,0x03,0x53,0x01,0xff] +0xfa,0xac,0x00,0x7e,0x03,0x53,0x01,0xff # GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08] 0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08