diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll @@ -0,0 +1,107 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s --mattr=+sve -o - | FileCheck %s + +target triple = "aarch64-arm-none-eabi" + +; a[i] * b[i] * (11.0 + 3.0.i); +; +define @complex_mul_const( %a, %b) { +; CHECK-LABEL: complex_mul_const: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: uzp1 z4.d, z2.d, z3.d +; CHECK-NEXT: uzp1 z5.d, z0.d, z1.d +; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uzp2 z1.d, z2.d, z3.d +; CHECK-NEXT: fmul z2.d, z4.d, z0.d +; CHECK-NEXT: fmla z2.d, p0/m, z1.d, z5.d +; CHECK-NEXT: fmul z0.d, z1.d, z0.d +; CHECK-NEXT: fmov z1.d, #11.00000000 +; CHECK-NEXT: fnmls z0.d, p0/m, z4.d, z5.d +; CHECK-NEXT: fmov z3.d, #3.00000000 +; CHECK-NEXT: fmul z4.d, z2.d, z1.d +; CHECK-NEXT: fmul z2.d, z2.d, z3.d +; CHECK-NEXT: fmla z4.d, p0/m, z0.d, z3.d +; CHECK-NEXT: fnmsb z1.d, p0/m, z0.d, z2.d +; CHECK-NEXT: zip1 z0.d, z1.d, z4.d +; CHECK-NEXT: zip2 z1.d, z1.d, z4.d +; CHECK-NEXT: ret +entry: + %strided.vec = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %a) + %0 = extractvalue { , } %strided.vec, 0 + %1 = extractvalue { , } %strided.vec, 1 + %strided.vec48 = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %b) + %2 = extractvalue { , } %strided.vec48, 0 + %3 = extractvalue { , } %strided.vec48, 1 + %4 = fmul fast %3, %0 + %5 = fmul fast %2, %1 + %6 = fadd fast %4, %5 + %7 = fmul fast %2, %0 + %8 = fmul fast %3, %1 + %9 = fsub fast %7, %8 + %10 = fmul fast %9, shufflevector ( insertelement ( poison, double 3.000000e+00, i64 0), poison, zeroinitializer) + %11 = fmul fast %6, shufflevector ( insertelement ( poison, double 1.100000e+01, i64 0), poison, zeroinitializer) + %12 = fadd fast %10, %11 + %13 = fmul fast %9, shufflevector ( insertelement ( poison, double 1.100000e+01, i64 0), poison, zeroinitializer) + %14 = fmul fast %6, shufflevector ( insertelement ( poison, double 3.000000e+00, i64 0), poison, zeroinitializer) + %15 = fsub fast %13, %14 + %interleaved.vec = tail call @llvm.experimental.vector.interleave2.nxv4f64( %15, %12) + ret %interleaved.vec +} + +; a[i] * b[i] * c; +; +define @complex_mul_non_const( %a, %b, [2 x double] %c) { +; CHECK-LABEL: complex_mul_non_const: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: uzp1 z6.d, z2.d, z3.d +; CHECK-NEXT: uzp1 z7.d, z0.d, z1.d +; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uzp2 z1.d, z2.d, z3.d +; CHECK-NEXT: fmul z2.d, z6.d, z0.d +; CHECK-NEXT: fmla z2.d, p0/m, z1.d, z7.d +; CHECK-NEXT: // kill: def $d4 killed $d4 def $z4 +; CHECK-NEXT: fmul z0.d, z1.d, z0.d +; CHECK-NEXT: mov z4.d, d4 +; CHECK-NEXT: // kill: def $d5 killed $d5 def $z5 +; CHECK-NEXT: mov z3.d, d5 +; CHECK-NEXT: fnmls z0.d, p0/m, z6.d, z7.d +; CHECK-NEXT: fmul z1.d, z2.d, z4.d +; CHECK-NEXT: fmul z2.d, z2.d, z3.d +; CHECK-NEXT: fmla z1.d, p0/m, z0.d, z3.d +; CHECK-NEXT: fnmls z2.d, p0/m, z0.d, z4.d +; CHECK-NEXT: zip1 z0.d, z2.d, z1.d +; CHECK-NEXT: zip2 z1.d, z2.d, z1.d +; CHECK-NEXT: ret +entry: + %c.coerce.fca.0.extract = extractvalue [2 x double] %c, 0 + %c.coerce.fca.1.extract = extractvalue [2 x double] %c, 1 + %broadcast.splatinsert = insertelement poison, double %c.coerce.fca.1.extract, i64 0 + %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer + %broadcast.splatinsert49 = insertelement poison, double %c.coerce.fca.0.extract, i64 0 + %broadcast.splat50 = shufflevector %broadcast.splatinsert49, poison, zeroinitializer + %strided.vec = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %a) + %0 = extractvalue { , } %strided.vec, 0 + %1 = extractvalue { , } %strided.vec, 1 + %strided.vec48 = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %b) + %2 = extractvalue { , } %strided.vec48, 0 + %3 = extractvalue { , } %strided.vec48, 1 + %4 = fmul fast %3, %0 + %5 = fmul fast %2, %1 + %6 = fadd fast %4, %5 + %7 = fmul fast %2, %0 + %8 = fmul fast %3, %1 + %9 = fsub fast %7, %8 + %10 = fmul fast %9, %broadcast.splat + %11 = fmul fast %6, %broadcast.splat50 + %12 = fadd fast %10, %11 + %13 = fmul fast %9, %broadcast.splat50 + %14 = fmul fast %6, %broadcast.splat + %15 = fsub fast %13, %14 + %interleaved.vec = tail call @llvm.experimental.vector.interleave2.nxv4f64( %15, %12) + ret %interleaved.vec +} + +declare { , } @llvm.experimental.vector.deinterleave2.nxv4f64() +declare @llvm.experimental.vector.interleave2.nxv4f64(, ) diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s --mattr=+complxnum -o - | FileCheck %s + +target triple = "aarch64-arm-none-eabi" + + +; a[i] * b[i] * (11.0 + 3.0.i); +; +define <4 x double> @complex_mul_const(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: complex_mul_const: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: zip1 v5.2d, v2.2d, v3.2d +; CHECK-NEXT: zip2 v6.2d, v0.2d, v1.2d +; CHECK-NEXT: zip2 v2.2d, v2.2d, v3.2d +; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d +; CHECK-NEXT: fmov v4.2d, #3.00000000 +; CHECK-NEXT: fmul v1.2d, v5.2d, v6.2d +; CHECK-NEXT: fmul v3.2d, v2.2d, v6.2d +; CHECK-NEXT: fmla v1.2d, v0.2d, v2.2d +; CHECK-NEXT: fneg v2.2d, v3.2d +; CHECK-NEXT: fmov v3.2d, #11.00000000 +; CHECK-NEXT: fmul v6.2d, v1.2d, v4.2d +; CHECK-NEXT: fmla v2.2d, v0.2d, v5.2d +; CHECK-NEXT: fmul v1.2d, v1.2d, v3.2d +; CHECK-NEXT: fneg v5.2d, v6.2d +; CHECK-NEXT: fmla v1.2d, v4.2d, v2.2d +; CHECK-NEXT: fmla v5.2d, v3.2d, v2.2d +; CHECK-NEXT: zip1 v0.2d, v5.2d, v1.2d +; CHECK-NEXT: zip2 v1.2d, v5.2d, v1.2d +; CHECK-NEXT: ret +entry: + %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> + %strided.vec47 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> + %strided.vec49 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> + %strided.vec50 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> + %0 = fmul fast <2 x double> %strided.vec50, %strided.vec + %1 = fmul fast <2 x double> %strided.vec49, %strided.vec47 + %2 = fadd fast <2 x double> %0, %1 + %3 = fmul fast <2 x double> %strided.vec49, %strided.vec + %4 = fmul fast <2 x double> %strided.vec50, %strided.vec47 + %5 = fsub fast <2 x double> %3, %4 + %6 = fmul fast <2 x double> %5, + %7 = fmul fast <2 x double> %2, + %8 = fadd fast <2 x double> %6, %7 + %9 = fmul fast <2 x double> %5, + %10 = fmul fast <2 x double> %2, + %11 = fsub fast <2 x double> %9, %10 + %interleaved.vec = shufflevector <2 x double> %11, <2 x double> %8, <4 x i32> + ret <4 x double> %interleaved.vec +} + + +; a[i] * b[i] * c; +; +define <4 x double> @complex_mul_non_const(<4 x double> %a, <4 x double> %b, [2 x double] %c) { +; CHECK-LABEL: complex_mul_non_const: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: zip1 v6.2d, v2.2d, v3.2d +; CHECK-NEXT: // kill: def $d5 killed $d5 def $q5 +; CHECK-NEXT: // kill: def $d4 killed $d4 def $q4 +; CHECK-NEXT: zip2 v7.2d, v0.2d, v1.2d +; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d +; CHECK-NEXT: zip2 v1.2d, v2.2d, v3.2d +; CHECK-NEXT: fmul v2.2d, v6.2d, v7.2d +; CHECK-NEXT: fmul v3.2d, v1.2d, v7.2d +; CHECK-NEXT: fmla v2.2d, v0.2d, v1.2d +; CHECK-NEXT: fneg v1.2d, v3.2d +; CHECK-NEXT: fmul v3.2d, v2.2d, v5.d[0] +; CHECK-NEXT: fmul v2.2d, v2.2d, v4.d[0] +; CHECK-NEXT: fmla v1.2d, v0.2d, v6.2d +; CHECK-NEXT: fneg v3.2d, v3.2d +; CHECK-NEXT: fmla v2.2d, v1.2d, v5.d[0] +; CHECK-NEXT: fmla v3.2d, v1.2d, v4.d[0] +; CHECK-NEXT: zip1 v0.2d, v3.2d, v2.2d +; CHECK-NEXT: zip2 v1.2d, v3.2d, v2.2d +; CHECK-NEXT: ret +entry: + %c.coerce.fca.1.extract = extractvalue [2 x double] %c, 1 + %c.coerce.fca.0.extract = extractvalue [2 x double] %c, 0 + %broadcast.splatinsert = insertelement <2 x double> poison, double %c.coerce.fca.1.extract, i64 0 + %broadcast.splat = shufflevector <2 x double> %broadcast.splatinsert, <2 x double> poison, <2 x i32> zeroinitializer + %broadcast.splatinsert51 = insertelement <2 x double> poison, double %c.coerce.fca.0.extract, i64 0 + %broadcast.splat52 = shufflevector <2 x double> %broadcast.splatinsert51, <2 x double> poison, <2 x i32> zeroinitializer + %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> + %strided.vec47 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> + %strided.vec49 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> + %strided.vec50 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> + %0 = fmul fast <2 x double> %strided.vec50, %strided.vec + %1 = fmul fast <2 x double> %strided.vec49, %strided.vec47 + %2 = fadd fast <2 x double> %0, %1 + %3 = fmul fast <2 x double> %strided.vec49, %strided.vec + %4 = fmul fast <2 x double> %strided.vec50, %strided.vec47 + %5 = fsub fast <2 x double> %3, %4 + %6 = fmul fast <2 x double> %5, %broadcast.splat + %7 = fmul fast <2 x double> %2, %broadcast.splat52 + %8 = fadd fast <2 x double> %6, %7 + %9 = fmul fast <2 x double> %5, %broadcast.splat52 + %10 = fmul fast <2 x double> %2, %broadcast.splat + %11 = fsub fast <2 x double> %9, %10 + %interleaved.vec = shufflevector <2 x double> %11, <2 x double> %8, <4 x i32> + ret <4 x double> %interleaved.vec +}