diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3246,9 +3246,15 @@ bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64; bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 || + Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 || + Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 || Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; + bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 || + Opc == AMDGPU::V_MAC_LEGACY_F32_e64 || + Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 || + Opc == AMDGPU::V_FMAC_LEGACY_F32_e64; switch (Opc) { default: @@ -3256,13 +3262,17 @@ case AMDGPU::V_MAC_F16_e64: case AMDGPU::V_FMAC_F16_e64: case AMDGPU::V_MAC_F32_e64: + case AMDGPU::V_MAC_LEGACY_F32_e64: case AMDGPU::V_FMAC_F32_e64: + case AMDGPU::V_FMAC_LEGACY_F32_e64: case AMDGPU::V_FMAC_F64_e64: break; case AMDGPU::V_MAC_F16_e32: case AMDGPU::V_FMAC_F16_e32: case AMDGPU::V_MAC_F32_e32: + case AMDGPU::V_MAC_LEGACY_F32_e32: case AMDGPU::V_FMAC_F32_e32: + case AMDGPU::V_FMAC_LEGACY_F32_e32: case AMDGPU::V_FMAC_F64_e32: { int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); @@ -3292,6 +3302,7 @@ const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod); if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 && + !IsLegacy && // If we have an SGPR input, we will violate the constant bus restriction. (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() || !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) { @@ -3361,10 +3372,14 @@ } } - unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64 - : IsF64 ? AMDGPU::V_FMA_F64_e64 - : AMDGPU::V_FMA_F32_e64) - : (IsF16 ? AMDGPU::V_MAD_F16_e64 : AMDGPU::V_MAD_F32_e64); + unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64 + : IsF64 ? AMDGPU::V_FMA_F64_e64 + : IsLegacy + ? AMDGPU::V_FMA_LEGACY_F32_e64 + : AMDGPU::V_FMA_F32_e64 + : IsF16 ? AMDGPU::V_MAD_F16_e64 + : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64 + : AMDGPU::V_MAD_F32_e64; if (pseudoToMCOpcode(NewOpc) == -1) return nullptr; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll @@ -272,8 +272,7 @@ ; GFX6-LABEL: v_mad_legacy_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_mac_legacy_f32_e32 v2, v0, v1 -; GFX6-NEXT: v_mov_b32_e32 v0, v2 +; GFX6-NEXT: v_mad_legacy_f32 v0, v0, v1, v2 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_mad_legacy_f32: @@ -292,8 +291,7 @@ ; GFX101: ; %bb.0: ; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX101-NEXT: v_mac_legacy_f32_e32 v2, v0, v1 -; GFX101-NEXT: v_mov_b32_e32 v0, v2 +; GFX101-NEXT: v_mad_legacy_f32 v0, v0, v1, v2 ; GFX101-NEXT: s_setpc_b64 s[30:31] ; ; GFX103-LABEL: v_mad_legacy_f32: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll @@ -7,8 +7,7 @@ ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_waitcnt_vscnt null, 0x0 -; GCN-NEXT: v_fmac_legacy_f32_e32 v2, v0, v1 -; GCN-NEXT: v_mov_b32_e32 v0, v2 +; GCN-NEXT: v_fma_legacy_f32 v0, v0, v1, v2 ; GCN-NEXT: s_setpc_b64 s[30:31] %fma = call float @llvm.amdgcn.fma.legacy(float %a, float %b, float %c) ret float %fma diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll @@ -65,10 +65,10 @@ } ; GCN-LABEL: {{^}}test_mad_legacy_f32_imm: -; GFX6: v_mac_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +; GFX6: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} ; GFX8: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} ; GFX9: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; GFX101: v_mac_legacy_f32_e64 v{{[0-9]+}}, 0x41200000, s{{[0-9]+}} +; GFX101: v_mad_legacy_f32 v{{[0-9]+}}, 0x41200000, s{{[0-9]+}} ; GFX103: v_mul_legacy_f32_e64 v{{[0-9]+}}, 0x41200000, s{{[0-9]+}} ; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @test_mad_legacy_f32_imm(float addrspace(1)* %out, float %a, float %c) #2 {