Index: llvm/lib/Target/AMDGPU/AMDGPU.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPU.td +++ llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1705,6 +1705,9 @@ def HasPackedFP32Ops : Predicate<"Subtarget->hasPackedFP32Ops()">, AssemblerPredicate<(all_of FeaturePackedFP32Ops)>; +def HasPkMovB32 : Predicate<"Subtarget->hasPkMovB32()">, + AssemblerPredicate<(all_of FeatureGFX90AInsts)>; + def HasFmaakFmamkF32Insts : Predicate<"Subtarget->hasFmaakFmamkF32Insts()">, AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX940Insts)>; Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h =================================================================== --- llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -916,6 +916,11 @@ return HasPackedFP32Ops; } + // Has V_PK_MOV_B32 opcode + bool hasPkMovB32() const { + return GFX90AInsts; + } + bool hasFmaakFmamkF32Insts() const { return getGeneration() >= GFX10 || hasGFX940Insts(); } Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -939,7 +939,7 @@ .addReg(SrcReg, getKillRegState(KillSrc)); return; } - if (ST.hasPackedFP32Ops()) { + if (ST.hasPkMovB32()) { BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestReg) .addImm(SISrcMods::OP_SEL_1) .addReg(SrcReg) @@ -986,7 +986,7 @@ if (ST.hasMovB64()) { Opcode = AMDGPU::V_MOV_B64_e32; EltSize = 8; - } else if (ST.hasPackedFP32Ops()) { + } else if (ST.hasPkMovB32()) { Opcode = AMDGPU::V_PK_MOV_B32; EltSize = 8; } @@ -2026,7 +2026,7 @@ APInt Imm(64, SrcOp.getImm()); APInt Lo(32, Imm.getLoBits(32).getZExtValue()); APInt Hi(32, Imm.getHiBits(32).getZExtValue()); - if (ST.hasPackedFP32Ops() && Lo == Hi && isInlineConstant(Lo)) { + if (ST.hasPkMovB32() && Lo == Hi && isInlineConstant(Lo)) { BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), Dst) .addImm(SISrcMods::OP_SEL_1) .addImm(Lo.getSExtValue()) @@ -2047,7 +2047,7 @@ } } else { assert(SrcOp.isReg()); - if (ST.hasPackedFP32Ops() && + if (ST.hasPkMovB32() && !RI.isAGPR(MBB.getParent()->getRegInfo(), SrcOp.getReg())) { BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), Dst) .addImm(SISrcMods::OP_SEL_1) // src0_mod Index: llvm/lib/Target/AMDGPU/VOP3PInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -738,11 +738,15 @@ } let SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1, isReMaterializable = 1 in { - defm V_PK_FMA_F32 : VOP3PInst<"v_pk_fma_f32", VOP3P_Profile, any_fma>; - defm V_PK_MUL_F32 : VOP3PInst<"v_pk_mul_f32", VOP3P_Profile, any_fmul>; - defm V_PK_ADD_F32 : VOP3PInst<"v_pk_add_f32", VOP3P_Profile, any_fadd>; + let SubtargetPredicate = HasPackedFP32Ops in { + defm V_PK_FMA_F32 : VOP3PInst<"v_pk_fma_f32", VOP3P_Profile, any_fma>; + defm V_PK_MUL_F32 : VOP3PInst<"v_pk_mul_f32", VOP3P_Profile, any_fmul>; + defm V_PK_ADD_F32 : VOP3PInst<"v_pk_add_f32", VOP3P_Profile, any_fadd>; + } // End SubtargetPredicate = HasPackedFP32Ops + + let SubtargetPredicate = HasPkMovB32 in defm V_PK_MOV_B32 : VOP3PInst<"v_pk_mov_b32", VOP3P_Profile>; -} // End SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1 +} // End isCommutable = 1, isReMaterializable = 1 def : MnemonicAlias<"v_accvgpr_read", "v_accvgpr_read_b32">; def : MnemonicAlias<"v_accvgpr_write", "v_accvgpr_write_b32">;