diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -3652,6 +3652,14 @@ (Inst $add, $m1, $m2)>; def : Pat<(VTI.Vec (fma m1, (fneg m2), add)), (Inst $add, $m1, $m2)>; + def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred), + (VTI.Vec (fma (fneg m1), m2, add)), + add)), + (Inst $add, $m1, $m2, ARMVCCThen, $pred)>; + def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred), + (VTI.Vec (fma m1, (fneg m2), add)), + add)), + (Inst $add, $m1, $m2, ARMVCCThen, $pred)>; def : Pat<(VTI.Vec (pred_int (fneg m1), m2, add, pred)), (Inst $add, $m1, $m2, ARMVCCThen, $pred)>; def : Pat<(VTI.Vec (pred_int m1, (fneg m2), add, pred)), @@ -3659,6 +3667,10 @@ } else { def : Pat<(VTI.Vec (fma m1, m2, add)), (Inst $add, $m1, $m2)>; + def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred), + (VTI.Vec (fma m1, m2, add)), + add)), + (Inst $add, $m1, $m2, ARMVCCThen, $pred)>; def : Pat<(VTI.Vec (pred_int m1, m2, add, pred)), (Inst $add, $m1, $m2, ARMVCCThen, $pred)>; } @@ -5538,6 +5550,10 @@ if scalar_addend then { def : Pat<(VTI.Vec (fma v1, v2, vs)), (VTI.Vec (Inst v1, v2, is))>; + def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred), + (VTI.Vec (fma v1, v2, vs)), + v1)), + (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>; def : Pat<(VTI.Vec (pred_int v1, v2, vs, pred)), (VTI.Vec (Inst v1, v2, is, ARMVCCThen, pred))>; } else { @@ -5545,6 +5561,14 @@ (VTI.Vec (Inst v2, v1, is))>; def : Pat<(VTI.Vec (fma vs, v1, v2)), (VTI.Vec (Inst v2, v1, is))>; + def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred), + (VTI.Vec (fma vs, v2, v1)), + v1)), + (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>; + def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred), + (VTI.Vec (fma v2, vs, v1)), + v1)), + (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>; def : Pat<(VTI.Vec (pred_int v1, vs, v2, pred)), (VTI.Vec (Inst v2, v1, is, ARMVCCThen, pred))>; def : Pat<(VTI.Vec (pred_int vs, v1, v2, pred)), diff --git a/llvm/test/CodeGen/Thumb2/mve-fmas.ll b/llvm/test/CodeGen/Thumb2/mve-fmas.ll --- a/llvm/test/CodeGen/Thumb2/mve-fmas.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fmas.ll @@ -481,10 +481,8 @@ ; ; CHECK-MVE-VMLA-LABEL: vfma16_v1_pred: ; CHECK-MVE-VMLA: @ %bb.0: @ %entry -; CHECK-MVE-VMLA-NEXT: vmov q3, q0 -; CHECK-MVE-VMLA-NEXT: vcmp.f16 lt, q1, zr -; CHECK-MVE-VMLA-NEXT: vfma.f16 q3, q1, q2 -; CHECK-MVE-VMLA-NEXT: vpsel q0, q3, q0 +; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr +; CHECK-MVE-VMLA-NEXT: vfmat.f16 q0, q1, q2 ; CHECK-MVE-VMLA-NEXT: bx lr ; ; CHECK-MVE-LABEL: vfma16_v1_pred: @@ -628,10 +626,8 @@ ; ; CHECK-MVE-VMLA-LABEL: vfma16_v2_pred: ; CHECK-MVE-VMLA: @ %bb.0: @ %entry -; CHECK-MVE-VMLA-NEXT: vmov q3, q0 -; CHECK-MVE-VMLA-NEXT: vcmp.f16 lt, q1, zr -; CHECK-MVE-VMLA-NEXT: vfma.f16 q3, q1, q2 -; CHECK-MVE-VMLA-NEXT: vpsel q0, q3, q0 +; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr +; CHECK-MVE-VMLA-NEXT: vfmat.f16 q0, q1, q2 ; CHECK-MVE-VMLA-NEXT: bx lr ; ; CHECK-MVE-LABEL: vfma16_v2_pred: @@ -775,10 +771,8 @@ ; ; CHECK-MVE-VMLA-LABEL: vfms16_pred: ; CHECK-MVE-VMLA: @ %bb.0: @ %entry -; CHECK-MVE-VMLA-NEXT: vmov q3, q0 -; CHECK-MVE-VMLA-NEXT: vcmp.f16 lt, q1, zr -; CHECK-MVE-VMLA-NEXT: vfms.f16 q3, q1, q2 -; CHECK-MVE-VMLA-NEXT: vpsel q0, q3, q0 +; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr +; CHECK-MVE-VMLA-NEXT: vfmst.f16 q0, q1, q2 ; CHECK-MVE-VMLA-NEXT: bx lr ; ; CHECK-MVE-LABEL: vfms16_pred: @@ -926,11 +920,9 @@ ; CHECK-MVE-VMLA-LABEL: vfmar16_pred: ; CHECK-MVE-VMLA: @ %bb.0: @ %entry ; CHECK-MVE-VMLA-NEXT: vcvtb.f16.f32 s8, s8 -; CHECK-MVE-VMLA-NEXT: vcmp.f16 lt, q1, zr ; CHECK-MVE-VMLA-NEXT: vmov.f16 r0, s8 -; CHECK-MVE-VMLA-NEXT: vmov q2, q0 -; CHECK-MVE-VMLA-NEXT: vfma.f16 q2, q1, r0 -; CHECK-MVE-VMLA-NEXT: vpsel q0, q2, q0 +; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr +; CHECK-MVE-VMLA-NEXT: vfmat.f16 q0, q1, r0 ; CHECK-MVE-VMLA-NEXT: bx lr ; ; CHECK-MVE-LABEL: vfmar16_pred: @@ -1074,11 +1066,9 @@ ; CHECK-MVE-VMLA-LABEL: vfma16_pred: ; CHECK-MVE-VMLA: @ %bb.0: @ %entry ; CHECK-MVE-VMLA-NEXT: vcvtb.f16.f32 s8, s8 -; CHECK-MVE-VMLA-NEXT: vcmp.f16 lt, q1, zr ; CHECK-MVE-VMLA-NEXT: vmov.f16 r0, s8 -; CHECK-MVE-VMLA-NEXT: vmov q2, q0 -; CHECK-MVE-VMLA-NEXT: vfmas.f16 q2, q1, r0 -; CHECK-MVE-VMLA-NEXT: vpsel q0, q2, q0 +; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr +; CHECK-MVE-VMLA-NEXT: vfmast.f16 q0, q1, r0 ; CHECK-MVE-VMLA-NEXT: bx lr ; ; CHECK-MVE-LABEL: vfma16_pred: @@ -1218,10 +1208,8 @@ ; ; CHECK-MVE-VMLA-LABEL: vfma32_v1_pred: ; CHECK-MVE-VMLA: @ %bb.0: @ %entry -; CHECK-MVE-VMLA-NEXT: vmov q3, q0 -; CHECK-MVE-VMLA-NEXT: vcmp.f32 lt, q1, zr -; CHECK-MVE-VMLA-NEXT: vfma.f32 q3, q1, q2 -; CHECK-MVE-VMLA-NEXT: vpsel q0, q3, q0 +; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr +; CHECK-MVE-VMLA-NEXT: vfmat.f32 q0, q1, q2 ; CHECK-MVE-VMLA-NEXT: bx lr ; ; CHECK-MVE-LABEL: vfma32_v1_pred: @@ -1290,10 +1278,8 @@ ; ; CHECK-MVE-VMLA-LABEL: vfma32_v2_pred: ; CHECK-MVE-VMLA: @ %bb.0: @ %entry -; CHECK-MVE-VMLA-NEXT: vmov q3, q0 -; CHECK-MVE-VMLA-NEXT: vcmp.f32 lt, q1, zr -; CHECK-MVE-VMLA-NEXT: vfma.f32 q3, q1, q2 -; CHECK-MVE-VMLA-NEXT: vpsel q0, q3, q0 +; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr +; CHECK-MVE-VMLA-NEXT: vfmat.f32 q0, q1, q2 ; CHECK-MVE-VMLA-NEXT: bx lr ; ; CHECK-MVE-LABEL: vfma32_v2_pred: @@ -1362,10 +1348,8 @@ ; ; CHECK-MVE-VMLA-LABEL: vfms32_pred: ; CHECK-MVE-VMLA: @ %bb.0: @ %entry -; CHECK-MVE-VMLA-NEXT: vmov q3, q0 -; CHECK-MVE-VMLA-NEXT: vcmp.f32 lt, q1, zr -; CHECK-MVE-VMLA-NEXT: vfms.f32 q3, q1, q2 -; CHECK-MVE-VMLA-NEXT: vpsel q0, q3, q0 +; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr +; CHECK-MVE-VMLA-NEXT: vfmst.f32 q0, q1, q2 ; CHECK-MVE-VMLA-NEXT: bx lr ; ; CHECK-MVE-LABEL: vfms32_pred: @@ -1437,10 +1421,8 @@ ; CHECK-MVE-VMLA-LABEL: vfmar32_pred: ; CHECK-MVE-VMLA: @ %bb.0: @ %entry ; CHECK-MVE-VMLA-NEXT: vmov r0, s8 -; CHECK-MVE-VMLA-NEXT: vmov q2, q0 -; CHECK-MVE-VMLA-NEXT: vcmp.f32 lt, q1, zr -; CHECK-MVE-VMLA-NEXT: vfma.f32 q2, q1, r0 -; CHECK-MVE-VMLA-NEXT: vpsel q0, q2, q0 +; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr +; CHECK-MVE-VMLA-NEXT: vfmat.f32 q0, q1, r0 ; CHECK-MVE-VMLA-NEXT: bx lr ; ; CHECK-MVE-LABEL: vfmar32_pred: @@ -1513,10 +1495,8 @@ ; CHECK-MVE-VMLA-LABEL: vfmas32_pred: ; CHECK-MVE-VMLA: @ %bb.0: @ %entry ; CHECK-MVE-VMLA-NEXT: vmov r0, s8 -; CHECK-MVE-VMLA-NEXT: vmov q2, q0 -; CHECK-MVE-VMLA-NEXT: vcmp.f32 lt, q1, zr -; CHECK-MVE-VMLA-NEXT: vfmas.f32 q2, q1, r0 -; CHECK-MVE-VMLA-NEXT: vpsel q0, q2, q0 +; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr +; CHECK-MVE-VMLA-NEXT: vfmast.f32 q0, q1, r0 ; CHECK-MVE-VMLA-NEXT: bx lr ; ; CHECK-MVE-LABEL: vfmas32_pred: