Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2978,6 +2978,16 @@ (!cast(NAME # _S) $pred, $Op1, $Op2, $Op3)>; def : Pat<(outerop nxv2i64:$Op1, (mulop nxv2i1:$pred, nxv2i64:$Op2, nxv2i64:$Op3)), (!cast(NAME # _D) $pred, $Op1, $Op2, $Op3)>; + + // e.g. add(a, select(mask, mul(b, b), splat(0))) -> mla(a, mask, b, b) + def : Pat<(outerop nxv16i8:$Op1, (vselect nxv16i1:$pred, (mulop (nxv16i1 (AArch64ptrue 31)), nxv16i8:$Op2, nxv16i8:$Op2), (SVEDup0))), + (!cast(NAME # _B) $pred, $Op1, $Op2, $Op2)>; + def : Pat<(outerop nxv8i16:$Op1, (vselect nxv8i1:$pred, (mulop (nxv8i1 (AArch64ptrue 31)), nxv8i16:$Op2, nxv8i16:$Op2), (SVEDup0))), + (!cast(NAME # _H) $pred, $Op1, $Op2, $Op2)>; + def : Pat<(outerop nxv4i32:$Op1, (vselect nxv4i1:$pred, (mulop (nxv4i1 (AArch64ptrue 31)), nxv4i32:$Op2, nxv4i32:$Op2), (SVEDup0))), + (!cast(NAME # _S) $pred, $Op1, $Op2, $Op2)>; + def : Pat<(outerop nxv2i64:$Op1, (vselect nxv2i1:$pred, (mulop (nxv2i1 (AArch64ptrue 31)), nxv2i64:$Op2, nxv2i64:$Op2), (SVEDup0))), + (!cast(NAME # _D) $pred, $Op1, $Op2, $Op2)>; } //===----------------------------------------------------------------------===// Index: llvm/test/CodeGen/AArch64/sve-masked-int-arith.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-masked-int-arith.ll +++ llvm/test/CodeGen/AArch64/sve-masked-int-arith.ll @@ -96,9 +96,7 @@ define @masked_mla_nxv16i8( %a, %b, %mask) { ; CHECK-LABEL: masked_mla_nxv16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: mul z1.b, p1/m, z1.b, z1.b -; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: mla z0.b, p0/m, z1.b, z1.b ; CHECK-NEXT: ret %mul = mul nsw %b, %b %sel = select %mask, %mul, zeroinitializer @@ -109,9 +107,7 @@ define @masked_mla_nxv8i16( %a, %b, %mask) { ; CHECK-LABEL: masked_mla_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mul z1.h, p1/m, z1.h, z1.h -; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: mla z0.h, p0/m, z1.h, z1.h ; CHECK-NEXT: ret %mul = mul nsw %b, %b %sel = select %mask, %mul, zeroinitializer @@ -122,9 +118,7 @@ define @masked_mla_nxv4i32( %a, %b, %mask) { ; CHECK-LABEL: masked_mla_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: mul z1.s, p1/m, z1.s, z1.s -; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: mla z0.s, p0/m, z1.s, z1.s ; CHECK-NEXT: ret %mul = mul nsw %b, %b %sel = select %mask, %mul, zeroinitializer @@ -135,9 +129,7 @@ define @masked_mla_nxv2i64( %a, %b, %mask) { ; CHECK-LABEL: masked_mla_nxv2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: mul z1.d, p1/m, z1.d, z1.d -; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: mla z0.d, p0/m, z1.d, z1.d ; CHECK-NEXT: ret %mul = mul nsw %b, %b %sel = select %mask, %mul, zeroinitializer @@ -152,9 +144,7 @@ define @masked_mls_nxv16i8( %a, %b, %mask) { ; CHECK-LABEL: masked_mls_nxv16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: mul z1.b, p1/m, z1.b, z1.b -; CHECK-NEXT: sub z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: mls z0.b, p0/m, z1.b, z1.b ; CHECK-NEXT: ret %mul = mul nsw %b, %b %sel = select %mask, %mul, zeroinitializer @@ -165,9 +155,7 @@ define @masked_mls_nxv8i16( %a, %b, %mask) { ; CHECK-LABEL: masked_mls_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mul z1.h, p1/m, z1.h, z1.h -; CHECK-NEXT: sub z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: mls z0.h, p0/m, z1.h, z1.h ; CHECK-NEXT: ret %mul = mul nsw %b, %b %sel = select %mask, %mul, zeroinitializer @@ -178,9 +166,7 @@ define @masked_mls_nxv4i32( %a, %b, %mask) { ; CHECK-LABEL: masked_mls_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: mul z1.s, p1/m, z1.s, z1.s -; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: mls z0.s, p0/m, z1.s, z1.s ; CHECK-NEXT: ret %mul = mul nsw %b, %b %sel = select %mask, %mul, zeroinitializer @@ -191,9 +177,7 @@ define @masked_mls_nxv2i64( %a, %b, %mask) { ; CHECK-LABEL: masked_mls_nxv2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: mul z1.d, p1/m, z1.d, z1.d -; CHECK-NEXT: sub z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: mls z0.d, p0/m, z1.d, z1.d ; CHECK-NEXT: ret %mul = mul nsw %b, %b %sel = select %mask, %mul, zeroinitializer