diff --git a/llvm/test/CodeGen/AArch64/neon-saba.ll b/llvm/test/CodeGen/AArch64/neon-saba.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/neon-saba.ll @@ -0,0 +1,146 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple aarch64-unknown-linux-gnu < %s | FileCheck %s + +; SABA from ADD(ABS(SUB NSW)) + +define <4 x i32> @saba_abs_4s(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { +; CHECK-LABEL: saba_abs_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.4s, v1.4s, v2.4s +; CHECK-NEXT: ret + %sub = sub nsw <4 x i32> %b, %c + %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true) + %add = add <4 x i32> %a, %abs + ret <4 x i32> %add +} + +define <2 x i32> @saba_abs_2s(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { +; CHECK-LABEL: saba_abs_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.2s, v1.2s, v2.2s +; CHECK-NEXT: ret + %sub = sub nsw <2 x i32> %b, %c + %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true) + %add = add <2 x i32> %a, %abs + ret <2 x i32> %add +} + +define <8 x i16> @saba_abs_8h(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { +; CHECK-LABEL: saba_abs_8h: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.8h, v1.8h, v2.8h +; CHECK-NEXT: ret + %sub = sub nsw <8 x i16> %b, %c + %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true) + %add = add <8 x i16> %a, %abs + ret <8 x i16> %add +} + +define <4 x i16> @saba_abs_4h(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { +; CHECK-LABEL: saba_abs_4h: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.4h, v1.4h, v2.4h +; CHECK-NEXT: ret + %sub = sub nsw <4 x i16> %b, %c + %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true) + %add = add <4 x i16> %a, %abs + ret <4 x i16> %add +} + +define <16 x i8> @saba_abs_16b(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { +; CHECK-LABEL: saba_abs_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret + %sub = sub nsw <16 x i8> %b, %c + %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true) + %add = add <16 x i8> %a, %abs + ret <16 x i8> %add +} + +define <8 x i8> @saba_abs_8b(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { +; CHECK-LABEL: saba_abs_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.8b, v1.8b, v2.8b +; CHECK-NEXT: ret + %sub = sub nsw <8 x i8> %b, %c + %abs = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %sub, i1 true) + %add = add <8 x i8> %a, %abs + ret <8 x i8> %add +} + +; SABA from ADD(SABD) + +define <4 x i32> @saba_sabd_4s(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { +; CHECK-LABEL: saba_sabd_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.4s, v1.4s, v2.4s +; CHECK-NEXT: ret + %sabd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %b, <4 x i32> %c) + %add = add <4 x i32> %sabd, %a + ret <4 x i32> %add +} + +define <2 x i32> @saba_sabd_2s(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { +; CHECK-LABEL: saba_sabd_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.2s, v1.2s, v2.2s +; CHECK-NEXT: ret + %sabd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c) + %add = add <2 x i32> %sabd, %a + ret <2 x i32> %add +} + +define <8 x i16> @saba_sabd_8h(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { +; CHECK-LABEL: saba_sabd_8h: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.8h, v1.8h, v2.8h +; CHECK-NEXT: ret + %sabd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %b, <8 x i16> %c) + %add = add <8 x i16> %sabd, %a + ret <8 x i16> %add +} + +define <4 x i16> @saba_sabd_4h(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { +; CHECK-LABEL: saba_sabd_4h: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.4h, v1.4h, v2.4h +; CHECK-NEXT: ret + %sabd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c) + %add = add <4 x i16> %sabd, %a + ret <4 x i16> %add +} + +define <16 x i8> @saba_sabd_16b(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { +; CHECK-LABEL: saba_sabd_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret + %sabd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %b, <16 x i8> %c) + %add = add <16 x i8> %sabd, %a + ret <16 x i8> %add +} + +define <8 x i8> @saba_sabd_8b(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { +; CHECK-LABEL: saba_sabd_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: saba v0.8b, v1.8b, v2.8b +; CHECK-NEXT: ret + %sabd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c) + %add = add <8 x i8> %sabd, %a + ret <8 x i8> %add +} + +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) +declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1) +declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1) +declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1) +declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) +declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1) + +declare <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32>, <4 x i32>) +declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) +declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>) +declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) +declare <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8>, <16 x i8>) +declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) diff --git a/llvm/test/CodeGen/AArch64/sve-saba.ll b/llvm/test/CodeGen/AArch64/sve-saba.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-saba.ll @@ -0,0 +1,121 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple aarch64-unknown-linux-gnu -mattr=+sve2 < %s | FileCheck %s + +; SABA from ADD(ABS(SUB NSW)) + +define @saba_abs_d( %a, %b, %c) #0 { +; CHECK-LABEL: saba_abs_d: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sub z1.d, z1.d, z2.d +; CHECK-NEXT: abs z1.d, p0/m, z1.d +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %sub = sub nsw %b, %c + %abs = call @llvm.abs.nxv2i64( %sub, i1 true) + %add = add %a, %abs + ret %add +} + +define @saba_abs_s( %a, %b, %c) #0 { +; CHECK-LABEL: saba_abs_s: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sub z1.s, z1.s, z2.s +; CHECK-NEXT: abs z1.s, p0/m, z1.s +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %sub = sub nsw %b, %c + %abs = call @llvm.abs.nxv4i32( %sub, i1 true) + %add = add %a, %abs + ret %add +} + +define @saba_abs_h( %a, %b, %c) #0 { +; CHECK-LABEL: saba_abs_h: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: sub z1.h, z1.h, z2.h +; CHECK-NEXT: abs z1.h, p0/m, z1.h +; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %sub = sub nsw %b, %c + %abs = call @llvm.abs.nxv8i16( %sub, i1 true) + %add = add %a, %abs + ret %add +} + +define @saba_abs_b( %a, %b, %c) #0 { +; CHECK-LABEL: saba_abs_b: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: sub z1.b, z1.b, z2.b +; CHECK-NEXT: abs z1.b, p0/m, z1.b +; CHECK-NEXT: add z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %sub = sub nsw %b, %c + %abs = call @llvm.abs.nxv16i8( %sub, i1 true) + %add = add %a, %abs + ret %add +} + +; SABA from ADD(SABD) + +define @saba_sabd_d( %a, %b, %c) #0 { +; CHECK-LABEL: saba_sabd_d: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %true = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %sabd = call @llvm.aarch64.sve.sabd.nxv2i64( %true, %b, %c) + %add = add %sabd, %a + ret %add +} + +define @saba_sabd_s( %a, %b, %c) #0 { +; CHECK-LABEL: saba_sabd_s: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.s, z1.s, z2.s +; CHECK-NEXT: ret + %true = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %sabd = call @llvm.aarch64.sve.sabd.nxv4i32( %true, %b, %c) + %add = add %sabd, %a + ret %add +} + +define @saba_sabd_h( %a, %b, %c) #0 { +; CHECK-LABEL: saba_sabd_h: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %true = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %sabd = call @llvm.aarch64.sve.sabd.nxv8i16( %true, %b, %c) + %add = add %sabd, %a + ret %add +} + +define @saba_sabd_b( %a, %b, %c) #0 { +; CHECK-LABEL: saba_sabd_b: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.b, z1.b, z2.b +; CHECK-NEXT: ret + %true = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %sabd = call @llvm.aarch64.sve.sabd.nxv16i8( %true, %b, %c) + %add = add %sabd, %a + ret %add +} + +declare @llvm.abs.nxv2i64(, i1) +declare @llvm.abs.nxv4i32(, i1) +declare @llvm.abs.nxv8i16(, i1) +declare @llvm.abs.nxv16i8(, i1) + +declare @llvm.aarch64.sve.ptrue.nxv2i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) + +declare @llvm.aarch64.sve.sabd.nxv2i64(, , ) +declare @llvm.aarch64.sve.sabd.nxv4i32(, , ) +declare @llvm.aarch64.sve.sabd.nxv8i16(, , ) +declare @llvm.aarch64.sve.sabd.nxv16i8(, , )