Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2569,6 +2569,11 @@ (i32 (DivergentUnaryFrag i32:$src)), (V_MAX_I32_e64 (V_SUB_CO_U32_e32 (i32 0), $src), $src)>; +def : GCNPat< + (f16 (fadd (f16 (fpround (f32 (fmul f32:$src0, f32:$src1)))), f16:$src2)), + (V_FMA_MIXLO_F16 0, $src0, 0, $src1, 0, $src2, 0, $src0, 0, 0) +>; + let AddedComplexity = 1 in { def : GCNPat< (i32 (DivergentUnaryFrag i32:$src)), Index: llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll +++ llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll @@ -2153,6 +2153,103 @@ ret i32 %cvt.result.i32 } +define { float } @mixlo_fptrunc(float %a, float %b, half %c) #0 { +; SDAG-GFX1100-LABEL: mixlo_fptrunc: +; SDAG-GFX1100: ; %bb.0: ; %.entry +; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG-GFX1100-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 +; SDAG-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; SDAG-GFX1100-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX900-LABEL: mixlo_fptrunc: +; SDAG-GFX900: ; %bb.0: ; %.entry +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX906-LABEL: mixlo_fptrunc: +; SDAG-GFX906: ; %bb.0: ; %.entry +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-VI-LABEL: mixlo_fptrunc: +; SDAG-VI: ; %bb.0: ; %.entry +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: mixlo_fptrunc: +; SDAG-CI: ; %bb.0: ; %.entry +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mul_f32_e32 v0, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_add_f32_e32 v0, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX1100-LABEL: mixlo_fptrunc: +; GISEL-GFX1100: ; %bb.0: ; %.entry +; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GISEL-GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GISEL-GFX1100-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-GFX1100-NEXT: v_add_f16_e32 v0, v0, v2 +; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GISEL-GFX1100-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: mixlo_fptrunc: +; GISEL-GFX900: ; %bb.0: ; %.entry +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mul_f32_e32 v0, v0, v1 +; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-GFX900-NEXT: v_add_f16_e32 v0, v0, v2 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: mixlo_fptrunc: +; GISEL-GFX906: ; %bb.0: ; %.entry +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_mul_f32_e32 v0, v0, v1 +; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-GFX906-NEXT: v_add_f16_e32 v0, v0, v2 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: mixlo_fptrunc: +; GISEL-VI: ; %bb.0: ; %.entry +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_mul_f32_e32 v0, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-VI-NEXT: v_add_f16_e32 v0, v0, v2 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: mixlo_fptrunc: +; GISEL-CI: ; %bb.0: ; %.entry +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_mul_f32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_add_f32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] +.entry: + %0 = fmul reassoc nnan nsz arcp contract afn float %a, %b + %1 = fptrunc float %0 to half + %2 = fadd reassoc nnan nsz arcp contract afn half %1, %c + %3 = bitcast half %2 to i16 + %4 = zext i16 %3 to i32 + %5 = bitcast i32 %4 to float + %6 = insertvalue { float } undef, float %5, 0 + ret { float } %6 +} + declare half @llvm.minnum.f16(half, half) #1 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1 declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>) #1