Index: llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td @@ -438,13 +438,20 @@ let Predicates = [Has16BitInsts, isGFX9]; } +def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile, fma> { + let Predicates = [Has16BitInsts, isVIOnly]; +} +def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile, fma> { + let renamedInGFX9 = 1; + let Predicates = [Has16BitInsts, isGFX9]; +} + let SubtargetPredicate = Has16BitInsts, isCommutable = 1 in { let renamedInGFX9 = 1 in { def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile, fmad>; def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile>; def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile>; -def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile, fma>; def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>>; } @@ -452,7 +459,6 @@ def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile>; def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile>; def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile>; -def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile>; def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>; } // End SubtargetPredicate = isGFX9 Index: llvm/trunk/test/CodeGen/AMDGPU/fdot2.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/fdot2.ll +++ llvm/trunk/test/CodeGen/AMDGPU/fdot2.ll @@ -8,16 +8,16 @@ ; Tests to make sure fdot2 is not generated when vector elements of dot-product expressions ; are not converted from f16 to f32. ; GCN-LABEL: {{^}}dotproduct_f16 -; GFX900: v_fma_legacy_f16 -; GFX900: v_fma_legacy_f16 +; GFX900: v_fma_f16 +; GFX900: v_fma_f16 ; GFX906: v_mul_f16_e32 ; GFX906: v_mul_f16_e32 -; GFX906-UNSAFE: v_fma_legacy_f16 +; GFX906-UNSAFE: v_fma_f16 ; GFX906-CONTRACT: v_mac_f16_e32 -; GFX906-DENORM-CONTRACT: v_fma_legacy_f16 +; GFX906-DENORM-CONTRACT: v_fma_f16 define amdgpu_kernel void @dotproduct_f16(<2 x half> addrspace(1)* %src1, <2 x half> addrspace(1)* %src2, half addrspace(1)* nocapture %dst) { Index: llvm/trunk/test/CodeGen/AMDGPU/fpext-free.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/fpext-free.ll +++ llvm/trunk/test/CodeGen/AMDGPU/fpext-free.ll @@ -171,7 +171,7 @@ ; GCN-LABEL: {{^}}fadd_fpext_fmuladd_f16_to_f32: ; GFX9: v_mul_f16 -; GFX9: v_fma_legacy_f16 +; GFX9: v_fma_f16 ; GFX9: v_cvt_f32_f16 ; GFX9: v_add_f32_e32 define float @fadd_fpext_fmuladd_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 { @@ -185,7 +185,7 @@ ; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32: ; GFX9: v_mul_f16 -; GFX9: v_fma_legacy_f16 +; GFX9: v_fma_f16 ; GFX9: v_cvt_f32_f16 ; GFX9: v_add_f32_e32 define float @fadd_fpext_fma_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 { @@ -199,7 +199,7 @@ ; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32_commute: ; GFX9: v_mul_f16 -; GFX9: v_fma_legacy_f16 +; GFX9: v_fma_f16 ; GFX9: v_cvt_f32_f16 ; GFX9: v_add_f32_e32 define float @fadd_fpext_fma_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 { @@ -322,7 +322,7 @@ ; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32: ; GFX9: v_mul_f16 -; GFX9: v_fma_legacy_f16 +; GFX9: v_fma_f16 ; GFX9: v_cvt_f32_f16 ; GFX9: v_sub_f32 ; GCN: s_setpc_b64 @@ -363,7 +363,7 @@ ; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32_commute: ; GCN: s_waitcnt ; GFX9-NEXT: v_mul_f16_e32 v3, v3, v4 -; GFX9-NEXT: v_fma_legacy_f16 v1, v1, v2, v3 +; GFX9-NEXT: v_fma_f16 v1, v1, v2, v3 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64