Index: llvm/test/CodeGen/AArch64/sve-int-arith.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-int-arith.ll +++ llvm/test/CodeGen/AArch64/sve-int-arith.ll @@ -337,8 +337,10 @@ ret %res } -define @mla_i8( %a, %b, %c) { -; CHECK-LABEL: mla_i8: +; Next four cases should generate mad instruction once pseudo instructions are emitted for MLA/MAD + +define @mla_i8_test1( %a, %b, %c) { +; CHECK-LABEL: mla_i8_test1: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mla z2.b, p0/m, z0.b, z1.b @@ -349,6 +351,104 @@ ret %res } +define @mla_i16_test1( %a, %b, %c) { +; CHECK-LABEL: mla_i16_test1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mla z2.h, p0/m, z0.h, z1.h +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %prod = mul %a, %b + %res = add %c, %prod + ret %res +} + +define @mla_i32_test1( %a, %b, %c) { +; CHECK-LABEL: mla_i32_test1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mla z2.s, p0/m, z0.s, z1.s +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %prod = mul %a, %b + %res = add %c, %prod + ret %res +} + +define @mla_i64_test1( %a, %b, %c) { +; CHECK-LABEL: mla_i64_test1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mla z2.d, p0/m, z0.d, z1.d +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %prod = mul %a, %b + %res = add %c, %prod + ret %res +} + +; Next four cases should generate mla instruction once pseudo instructions are emitted for MLA/MAD + +define @mla_i8_test2( %a, %b, %c) { +; CHECK-LABEL: mla_i8_test2: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mla z2.b, p0/m, z0.b, z1.b +; CHECK-NEXT: add z0.b, z0.b, z1.b +; CHECK-NEXT: add z0.b, z2.b, z0.b +; CHECK-NEXT: ret + %r0 = mul %a, %b + %r1 = add %c, %r0 + %r2 = add %r1, %a + %r3 = add %r2, %b + ret %r3 +} + +define @mla_i16_test2( %a, %b, %c) { +; CHECK-LABEL: mla_i16_test2: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mla z2.h, p0/m, z0.h, z1.h +; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: add z0.h, z2.h, z0.h +; CHECK-NEXT: ret + %r0 = mul %a, %b + %r1 = add %c, %r0 + %r2 = add %r1, %a + %r3 = add %r2, %b + ret %r3 +} + +define @mla_i32_test2( %a, %b, %c) { +; CHECK-LABEL: mla_i32_test2: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mla z2.s, p0/m, z0.s, z1.s +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: add z0.s, z2.s, z0.s +; CHECK-NEXT: ret + %r0 = mul %a, %b + %r1 = add %c, %r0 + %r2 = add %r1, %a + %r3 = add %r2, %b + ret %r3 +} + +define @mla_i64_test2( %a, %b, %c) { +; CHECK-LABEL: mla_i64_test2: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mla z2.d, p0/m, z0.d, z1.d +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: add z0.d, z2.d, z0.d +; CHECK-NEXT: ret + %r0 = mul %a, %b + %r1 = add %c, %r0 + %r2 = add %r1, %a + %r3 = add %r2, %b + ret %r3 +} + define @mla_i8_multiuse( %a, %b, %c, * %p) { ; CHECK-LABEL: mla_i8_multiuse: ; CHECK: // %bb.0: @@ -363,8 +463,10 @@ ret %res } -define @mls_i8( %a, %b, %c) { -; CHECK-LABEL: mls_i8: +; Next four cases should generate msb instruction once psuedo instruction is emitted for MLS/MSB + +define @mls_i8_test1( %a, %b, %c) { +; CHECK-LABEL: mls_i8_test1: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mls z2.b, p0/m, z0.b, z1.b @@ -375,6 +477,326 @@ ret %res } +define @mls_i16_test1( %a, %b, %c) { +; CHECK-LABEL: mls_i16_test1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mls z2.h, p0/m, z0.h, z1.h +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %prod = mul %a, %b + %res = sub %c, %prod + ret %res +} + +define @mls_i32_test1( %a, %b, %c) { +; CHECK-LABEL: mls_i32_test1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mls z2.s, p0/m, z0.s, z1.s +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %prod = mul %a, %b + %res = sub %c, %prod + ret %res +} + +define @mls_i64_test1( %a, %b, %c) { +; CHECK-LABEL: mls_i64_test1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mls z2.d, p0/m, z0.d, z1.d +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %prod = mul %a, %b + %res = sub %c, %prod + ret %res +} + +; Next four cases should generate mls instruction once pseudo instruction is emitted for MLA/MSB + +define @mls_i8_test2( %a, %b, %c) { +; CHECK-LABEL: mls_i8_test2: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mls z2.b, p0/m, z0.b, z1.b +; CHECK-NEXT: sub z0.b, z2.b, z0.b +; CHECK-NEXT: sub z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %r0 = mul %a, %b + %r1 = sub %c, %r0 + %r2 = sub %r1, %a + %r3 = sub %r2, %b + ret %r3 +} + +define @mls_i16_test2( %a, %b, %c) { +; CHECK-LABEL: mls_i16_test2: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mls z2.h, p0/m, z0.h, z1.h +; CHECK-NEXT: sub z0.h, z2.h, z0.h +; CHECK-NEXT: sub z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %r0 = mul %a, %b + %r1 = sub %c, %r0 + %r2 = sub %r1, %a + %r3 = sub %r2, %b + ret %r3 +} + +define @mls_i32_test2( %a, %b, %c) { +; CHECK-LABEL: mls_i32_test2: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mls z2.s, p0/m, z0.s, z1.s +; CHECK-NEXT: sub z0.s, z2.s, z0.s +; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %r0 = mul %a, %b + %r1 = sub %c, %r0 + %r2 = sub %r1, %a + %r3 = sub %r2, %b + ret %r3 +} + +define @mls_i64_test2( %a, %b, %c) { +; CHECK-LABEL: mls_i64_test2: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mls z2.d, p0/m, z0.d, z1.d +; CHECK-NEXT: sub z0.d, z2.d, z0.d +; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %r0 = mul %a, %b + %r1 = sub %c, %r0 + %r2 = sub %r1, %a + %r3 = sub %r2, %b + ret %r3 +} + +; Test cases below have one of the add/sub operands as constant splat + + define @muladd_i64_positiveAddend( %a, %b) +; CHECK-LABEL: muladd_i64_positiveAddend: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, #0xffffffff +; CHECK-NEXT: mla z2.d, p0/m, z0.d, z1.d +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret +{ + %1 = mul %a, %b + %2 = add %1, shufflevector ( insertelement ( poison, i64 4294967295, i64 0), poison, zeroinitializer) + ret %2 +} + +define @muladd_i64_negativeAddend( %a, %b) +; CHECK-LABEL: muladd_i64_negativeAddend: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, #0xffffffff00000001 +; CHECK-NEXT: mla z2.d, p0/m, z0.d, z1.d +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret +{ + %1 = mul %a, %b + %2 = add %1, shufflevector ( insertelement ( poison, i64 -4294967295, i64 0), poison, zeroinitializer) + ret %2 +} + + +define @muladd_i32_positiveAddend( %a, %b) +; CHECK-LABEL: muladd_i32_positiveAddend: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z2.s, #0x10000 +; CHECK-NEXT: mla z2.s, p0/m, z0.s, z1.s +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret +{ + %1 = mul %a, %b + %2 = add %1, shufflevector ( insertelement ( poison, i32 65536, i32 0), poison, zeroinitializer) + ret %2 +} + +define @muladd_i32_negativeAddend( %a, %b) +; CHECK-LABEL: muladd_i32_negativeAddend: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z2.s, #0xffff0000 +; CHECK-NEXT: mla z2.s, p0/m, z0.s, z1.s +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret +{ + %1 = mul %a, %b + %2 = add %1, shufflevector ( insertelement ( poison, i32 -65536, i32 0), poison, zeroinitializer) + ret %2 +} + +define @muladd_i16_positiveAddend( %a, %b) +; CHECK-LABEL: muladd_i16_positiveAddend: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: add z0.h, z0.h, #255 // =0xff +; CHECK-NEXT: ret +{ + %1 = mul %a, %b + %2 = add %1, shufflevector ( insertelement ( poison, i16 255, i16 0), poison, zeroinitializer) + ret %2 +} + +define @muladd_i16_negativeAddend( %a, %b) +; CHECK-LABEL: muladd_i16_negativeAddend: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z2.h, #-255 // =0xffffffffffffff01 +; CHECK-NEXT: mla z2.h, p0/m, z0.h, z1.h +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret +{ + %1 = mul %a, %b + %2 = add %1, shufflevector ( insertelement ( poison, i16 -255, i16 0), poison, zeroinitializer) + ret %2 +} + +define @muladd_i8_positiveAddend( %a, %b) +; CHECK-LABEL: muladd_i8_positiveAddend: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: add z0.b, z0.b, #15 // =0xf +; CHECK-NEXT: ret +{ + %1 = mul %a, %b + %2 = add %1, shufflevector ( insertelement ( poison, i8 15, i8 0), poison, zeroinitializer) + ret %2 +} + +define @muladd_i8_negativeAddend( %a, %b) +; CHECK-LABEL: muladd_i8_negativeAddend: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: add z0.b, z0.b, #241 // =0xf1 +; CHECK-NEXT: ret +{ + %1 = mul %a, %b + %2 = add %1, shufflevector ( insertelement ( poison, i8 -15, i8 0), poison, zeroinitializer) + ret %2 +} + +define @mulsub_i64_positiveAddend( %a, %b) +; CHECK-LABEL: mulsub_i64_positiveAddend: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: mov z1.d, #0xffffffff +; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: ret +{ + %1 = mul %a, %b + %2 = sub %1, shufflevector ( insertelement ( poison, i64 4294967295, i64 0), poison, zeroinitializer) + ret %2 +} + +define @mulsub_i64_negativeAddend( %a, %b) +; CHECK-LABEL: mulsub_i64_negativeAddend: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: mov z1.d, #0xffffffff00000001 +; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: ret +{ + %1 = mul %a, %b + %2 = sub %1, shufflevector ( insertelement ( poison, i64 -4294967295, i64 0), poison, zeroinitializer) + ret %2 +} + + +define @mulsub_i32_positiveAddend( %a, %b) +; CHECK-LABEL: mulsub_i32_positiveAddend: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #0x10000 +; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: ret +{ + %1 = mul %a, %b + %2 = sub %1, shufflevector ( insertelement ( poison, i32 65536, i32 0), poison, zeroinitializer) + ret %2 +} + +define @mulsub_i32_negativeAddend( %a, %b) +; CHECK-LABEL: mulsub_i32_negativeAddend: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #0xffff0000 +; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: ret +{ + %1 = mul %a, %b + %2 = sub %1, shufflevector ( insertelement ( poison, i32 -65536, i32 0), poison, zeroinitializer) + ret %2 +} + +define @mulsub_i16_positiveAddend( %a, %b) +; CHECK-LABEL: mulsub_i16_positiveAddend: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: sub z0.h, z0.h, #255 // =0xff +; CHECK-NEXT: ret +{ + %1 = mul %a, %b + %2 = sub %1, shufflevector ( insertelement ( poison, i16 255, i16 0), poison, zeroinitializer) + ret %2 +} + +define @mulsub_i16_negativeAddend( %a, %b) +; CHECK-LABEL: mulsub_i16_negativeAddend: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: mov z1.h, #-255 // =0xffffffffffffff01 +; CHECK-NEXT: sub z0.h, z0.h, z1.h +; CHECK-NEXT: ret +{ + %1 = mul %a, %b + %2 = sub %1, shufflevector ( insertelement ( poison, i16 -255, i16 0), poison, zeroinitializer) + ret %2 +} + +define @mulsub_i8_positiveAddend( %a, %b) +; CHECK-LABEL: mulsub_i8_positiveAddend: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: sub z0.b, z0.b, #15 // =0xf +; CHECK-NEXT: ret +{ + %1 = mul %a, %b + %2 = sub %1, shufflevector ( insertelement ( poison, i8 15, i8 0), poison, zeroinitializer) + ret %2 +} + +define @mulsub_i8_negativeAddend( %a, %b) +; CHECK-LABEL: mulsub_i8_negativeAddend: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: sub z0.b, z0.b, #241 // =0xf1 +; CHECK-NEXT: ret +{ + %1 = mul %a, %b + %2 = sub %1, shufflevector ( insertelement ( poison, i8 -15, i8 0), poison, zeroinitializer) + ret %2 +} + declare @llvm.sadd.sat.nxv16i8(, ) declare @llvm.sadd.sat.nxv8i16(, ) declare @llvm.sadd.sat.nxv4i32(, )