diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1705,6 +1705,9 @@ def HasPackedFP32Ops : Predicate<"Subtarget->hasPackedFP32Ops()">, AssemblerPredicate<(all_of FeaturePackedFP32Ops)>; +def HasPkMovB32 : Predicate<"Subtarget->hasPkMovB32()">, + AssemblerPredicate<(all_of FeatureGFX90AInsts)>; + def HasFmaakFmamkF32Insts : Predicate<"Subtarget->hasFmaakFmamkF32Insts()">, AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX940Insts)>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -916,6 +916,11 @@ return HasPackedFP32Ops; } + // Has V_PK_MOV_B32 opcode + bool hasPkMovB32() const { + return GFX90AInsts; + } + bool hasFmaakFmamkF32Insts() const { return getGeneration() >= GFX10 || hasGFX940Insts(); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -939,7 +939,7 @@ .addReg(SrcReg, getKillRegState(KillSrc)); return; } - if (ST.hasPackedFP32Ops()) { + if (ST.hasPkMovB32()) { BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestReg) .addImm(SISrcMods::OP_SEL_1) .addReg(SrcReg) @@ -986,7 +986,7 @@ if (ST.hasMovB64()) { Opcode = AMDGPU::V_MOV_B64_e32; EltSize = 8; - } else if (ST.hasPackedFP32Ops()) { + } else if (ST.hasPkMovB32()) { Opcode = AMDGPU::V_PK_MOV_B32; EltSize = 8; } @@ -2026,7 +2026,7 @@ APInt Imm(64, SrcOp.getImm()); APInt Lo(32, Imm.getLoBits(32).getZExtValue()); APInt Hi(32, Imm.getHiBits(32).getZExtValue()); - if (ST.hasPackedFP32Ops() && Lo == Hi && isInlineConstant(Lo)) { + if (ST.hasPkMovB32() && Lo == Hi && isInlineConstant(Lo)) { BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), Dst) .addImm(SISrcMods::OP_SEL_1) .addImm(Lo.getSExtValue()) @@ -2047,7 +2047,7 @@ } } else { assert(SrcOp.isReg()); - if (ST.hasPackedFP32Ops() && + if (ST.hasPkMovB32() && !RI.isAGPR(MBB.getParent()->getRegInfo(), SrcOp.getReg())) { BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), Dst) .addImm(SISrcMods::OP_SEL_1) // src0_mod diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -738,11 +738,15 @@ } let SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1, isReMaterializable = 1 in { - defm V_PK_FMA_F32 : VOP3PInst<"v_pk_fma_f32", VOP3P_Profile, any_fma>; - defm V_PK_MUL_F32 : VOP3PInst<"v_pk_mul_f32", VOP3P_Profile, any_fmul>; - defm V_PK_ADD_F32 : VOP3PInst<"v_pk_add_f32", VOP3P_Profile, any_fadd>; + let SubtargetPredicate = HasPackedFP32Ops in { + defm V_PK_FMA_F32 : VOP3PInst<"v_pk_fma_f32", VOP3P_Profile, any_fma>; + defm V_PK_MUL_F32 : VOP3PInst<"v_pk_mul_f32", VOP3P_Profile, any_fmul>; + defm V_PK_ADD_F32 : VOP3PInst<"v_pk_add_f32", VOP3P_Profile, any_fadd>; + } // End SubtargetPredicate = HasPackedFP32Ops + + let SubtargetPredicate = HasPkMovB32 in defm V_PK_MOV_B32 : VOP3PInst<"v_pk_mov_b32", VOP3P_Profile>; -} // End SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1 +} // End isCommutable = 1, isReMaterializable = 1 def : MnemonicAlias<"v_accvgpr_read", "v_accvgpr_read_b32">; def : MnemonicAlias<"v_accvgpr_write", "v_accvgpr_write_b32">;