Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -308,6 +308,12 @@ return N->hasOneUse(); }]>; +def AArch64fmul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2), + (AArch64fmul_p node:$pred, node:$src1, node:$src2), [{ + return N->hasOneUse(); +}]>; + + def AArch64fabd_p : PatFrag<(ops node:$pg, node:$op1, node:$op2), (AArch64fabs_mt node:$pg, (AArch64fsub_p node:$pg, node:$op1, node:$op2), undef)>; @@ -355,6 +361,12 @@ (sub node:$op1, (AArch64mul_p_oneuse node:$pred, node:$op2, node:$op3)), // sub(a, select(mask, mul(b, c), splat(0))) -> mls(a, mask, b, c) (sub node:$op1, (vselect node:$pred, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op3), (SVEDup0)))]>; +def AArch64fmla_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3), + [(int_aarch64_sve_fmla node:$pred, node:$op1, node:$op2, node:$op3), + (AArch64fadd_p (SVEAllActive), node:$op1, (vselect node:$pred, (AArch64fmul_p_oneuse (SVEAllActive), node:$op2, node:$op3), (SVEDup0)))]>; +def AArch64fmls_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3), + [(int_aarch64_sve_fmls node:$pred, node:$op1, node:$op2, node:$op3), + (AArch64fsub_p (SVEAllActive), node:$op1, (vselect node:$pred, (AArch64fmul_p_oneuse (SVEAllActive), node:$op2, node:$op3), (SVEDup0)))]>; let Predicates = [HasSVE] in { defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>; @@ -592,8 +604,8 @@ defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd", int_aarch64_sve_fcadd>; defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla", int_aarch64_sve_fcmla>; - defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla", "FMLA_ZPZZZ", int_aarch64_sve_fmla, "FMAD_ZPmZZ">; - defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls", "FMLS_ZPZZZ", int_aarch64_sve_fmls, "FMSB_ZPmZZ">; + defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla", "FMLA_ZPZZZ", AArch64fmla_m1, "FMAD_ZPmZZ">; + defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls", "FMLS_ZPZZZ", AArch64fmls_m1, "FMSB_ZPmZZ">; defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla", "FNMLA_ZPZZZ", int_aarch64_sve_fnmla, "FNMAD_ZPmZZ">; defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls", "FNMLS_ZPZZZ", int_aarch64_sve_fnmls, "FNMSB_ZPmZZ">; Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -542,6 +542,7 @@ : PatFrags<(ops node:$Pg, node:$Op1, node:$Op2), [ (intrinsic node:$Pg, node:$Op1, node:$Op2), (vselect node:$Pg, (sdnode (SVEAllActive), node:$Op1, node:$Op2), node:$Op1), + (sdnode (SVEAllActive), node:$Op1, (vselect node:$Pg, node:$Op2, (SVEDup0))) ]>; // Index: llvm/test/CodeGen/AArch64/sve-masked-fp-arith.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-masked-fp-arith.ll +++ llvm/test/CodeGen/AArch64/sve-masked-fp-arith.ll @@ -8,9 +8,7 @@ define @masked_fadd_nxv8f16( %a, %b, %mask) { ; CHECK-LABEL: masked_fadd_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.h, #0 // =0x0 -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h -; CHECK-NEXT: fadd z0.h, z0.h, z1.h +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %sel = select %mask, %b, zeroinitializer %fadd = fadd %a, %sel @@ -20,9 +18,7 @@ define @masked_fadd_nxv4f32( %a, %b, %mask) { ; CHECK-LABEL: masked_fadd_nxv4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.s, #0 // =0x0 -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s -; CHECK-NEXT: fadd z0.s, z0.s, z1.s +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %sel = select %mask, %b, zeroinitializer %fadd = fadd %a, %sel @@ -32,9 +28,7 @@ define @masked_fadd_nxv2f64( %a, %b, %mask) { ; CHECK-LABEL: masked_fadd_nxv2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, #0 // =0x0 -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d -; CHECK-NEXT: fadd z0.d, z0.d, z1.d +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %sel = select %mask, %b, zeroinitializer %fadd = fadd %a, %sel @@ -48,9 +42,7 @@ define @masked_fsub_nxv8f16( %a, %b, %mask) { ; CHECK-LABEL: masked_fsub_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.h, #0 // =0x0 -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h -; CHECK-NEXT: fsub z0.h, z0.h, z1.h +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %sel = select %mask, %b, zeroinitializer %fsub = fsub %a, %sel @@ -60,9 +52,7 @@ define @masked_fsub_nxv4f32( %a, %b, %mask) { ; CHECK-LABEL: masked_fsub_nxv4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.s, #0 // =0x0 -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s -; CHECK-NEXT: fsub z0.s, z0.s, z1.s +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %sel = select %mask, %b, zeroinitializer %fsub = fsub %a, %sel @@ -72,9 +62,7 @@ define @masked_fsub_nxv2f64( %a, %b, %mask) { ; CHECK-LABEL: masked_fsub_nxv2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, #0 // =0x0 -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d -; CHECK-NEXT: fsub z0.d, z0.d, z1.d +; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %sel = select %mask, %b, zeroinitializer %fsub = fsub %a, %sel @@ -88,9 +76,7 @@ define @masked_fmul_nxv8f16( %a, %b, %mask) { ; CHECK-LABEL: masked_fmul_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.h, #0 // =0x0 -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h -; CHECK-NEXT: fmul z0.h, z0.h, z1.h +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %sel = select %mask, %b, zeroinitializer %fmul = fmul %a, %sel @@ -100,9 +86,7 @@ define @masked_fmul_nxv4f32( %a, %b, %mask) { ; CHECK-LABEL: masked_fmul_nxv4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.s, #0 // =0x0 -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s -; CHECK-NEXT: fmul z0.s, z0.s, z1.s +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %sel = select %mask, %b, zeroinitializer %fmul = fmul %a, %sel @@ -112,9 +96,7 @@ define @masked_fmul_nxv2f64( %a, %b, %mask) { ; CHECK-LABEL: masked_fmul_nxv2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, #0 // =0x0 -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d -; CHECK-NEXT: fmul z0.d, z0.d, z1.d +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %sel = select %mask, %b, zeroinitializer %fmul = fmul %a, %sel @@ -128,8 +110,7 @@ define @masked_fmla_nxv8f16( %a, %b, %c, %mask) { ; CHECK-LABEL: masked_fmla_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: fmul z1.h, z1.h, z2.h -; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: ret %fmul = fmul %b, %c %sel = select %mask, %fmul, zeroinitializer @@ -140,8 +121,7 @@ define @masked_fmla_nxv4f32( %a, %b, %c, %mask) { ; CHECK-LABEL: masked_fmla_nxv4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmul z1.s, z1.s, z2.s -; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s ; CHECK-NEXT: ret %fmul = fmul %b, %c %sel = select %mask, %fmul, zeroinitializer @@ -152,8 +132,7 @@ define @masked_fmla_nxv2f64( %a, %b, %c, %mask) { ; CHECK-LABEL: masked_fmla_nxv2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmul z1.d, z1.d, z2.d -; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: fmla z0.d, p0/m, z1.d, z2.d ; CHECK-NEXT: ret %fmul = fmul %b, %c %sel = select %mask, %fmul, zeroinitializer @@ -168,8 +147,7 @@ define @masked_fmls_nxv8f16( %a, %b, %c, %mask) { ; CHECK-LABEL: masked_fmls_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: fmul z1.h, z1.h, z2.h -; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: fmls z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: ret %fmul = fmul %b, %c %sel = select %mask, %fmul, zeroinitializer @@ -180,8 +158,7 @@ define @masked_fmls_nxv4f32( %a, %b, %c, %mask) { ; CHECK-LABEL: masked_fmls_nxv4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmul z1.s, z1.s, z2.s -; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: fmls z0.s, p0/m, z1.s, z2.s ; CHECK-NEXT: ret %fmul = fmul %b, %c %sel = select %mask, %fmul, zeroinitializer @@ -192,8 +169,7 @@ define @masked_fmls_nxv2f64( %a, %b, %c, %mask) { ; CHECK-LABEL: masked_fmls_nxv2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmul z1.d, z1.d, z2.d -; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: fmls z0.d, p0/m, z1.d, z2.d ; CHECK-NEXT: ret %fmul = fmul %b, %c %sel = select %mask, %fmul, zeroinitializer