diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -359,6 +359,12 @@ case AMDGPU::V_FMA_F32_e64: NewOpcode = AMDGPU::V_FMAAK_F32; break; + case AMDGPU::V_MAD_F16_e64: + NewOpcode = AMDGPU::V_MADAK_F16; + break; + case AMDGPU::V_FMA_F16_e64: + NewOpcode = AMDGPU::V_FMAAK_F16; + break; } } @@ -380,6 +386,12 @@ case AMDGPU::V_FMA_F32_e64: NewOpcode = AMDGPU::V_FMAMK_F32; break; + case AMDGPU::V_MAD_F16_e64: + NewOpcode = AMDGPU::V_MADMK_F16; + break; + case AMDGPU::V_FMA_F16_e64: + NewOpcode = AMDGPU::V_FMAMK_F16; + break; } } @@ -806,9 +818,10 @@ if (!TII->isVOP3(MI)) continue; - // TODO: Also shrink F16 forms. if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 || - MI.getOpcode() == AMDGPU::V_FMA_F32_e64) { + MI.getOpcode() == AMDGPU::V_FMA_F32_e64 || + MI.getOpcode() == AMDGPU::V_MAD_F16_e64 || + MI.getOpcode() == AMDGPU::V_FMA_F16_e64) { shrinkMadFma(MI); continue; } diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir --- a/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir @@ -128,8 +128,8 @@ ; GFX10-LABEL: name: mad_cvv_f16 ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, 18688, 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]] + ; GFX10-NEXT: [[V_MADMK_F16_:%[0-9]+]]:vgpr_32 = V_MADMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit [[V_MADMK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_MAD_F16_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec @@ -143,8 +143,8 @@ ; GFX10-LABEL: name: mad_vcv_f16 ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, [[DEF]], 0, 18688, 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]] + ; GFX10-NEXT: [[V_MADMK_F16_:%[0-9]+]]:vgpr_32 = V_MADMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit [[V_MADMK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec @@ -158,8 +158,8 @@ ; GFX10-LABEL: name: mad_vvc_f16 ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]] + ; GFX10-NEXT: [[V_MADAK_F16_:%[0-9]+]]:vgpr_32 = V_MADAK_F16 [[DEF]], [[DEF1]], 18688, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit [[V_MADAK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec @@ -173,8 +173,8 @@ ; GFX10-LABEL: name: mad_vsc_f16 ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]] + ; GFX10-NEXT: [[V_MADAK_F16_:%[0-9]+]]:vgpr_32 = V_MADAK_F16 [[DEF1]], [[DEF]], 18688, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit [[V_MADAK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32 = IMPLICIT_DEF %2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec @@ -188,8 +188,8 @@ ; GFX10-LABEL: name: fma_cvv_f16 ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, 18688, 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]] + ; GFX10-NEXT: [[V_FMAMK_F16_:%[0-9]+]]:vgpr_32 = V_FMAMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_FMA_F16_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec @@ -203,8 +203,8 @@ ; GFX10-LABEL: name: fma_vcv_f16 ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, [[DEF]], 0, 18688, 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]] + ; GFX10-NEXT: [[V_FMAMK_F16_:%[0-9]+]]:vgpr_32 = V_FMAMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec @@ -218,8 +218,8 @@ ; GFX10-LABEL: name: fma_vvc_f16 ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]] + ; GFX10-NEXT: [[V_FMAAK_F16_:%[0-9]+]]:vgpr_32 = V_FMAAK_F16 [[DEF]], [[DEF1]], 18688, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec @@ -233,8 +233,8 @@ ; GFX10-LABEL: name: fma_vsc_f16 ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]] + ; GFX10-NEXT: [[V_FMAAK_F16_:%[0-9]+]]:vgpr_32 = V_FMAAK_F16 [[DEF1]], [[DEF]], 18688, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32 = IMPLICIT_DEF %2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec