Please use GitHub pull requests for new patches. Avoid migrating existing patches. Phabricator shutdown timeline
Changeset View
Changeset View
Standalone View
Standalone View
llvm/test/CodeGen/AArch64/complex-arithmetic-f32-mul.ll
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||||
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s | ; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s | ||||
target triple = "aarch64-arm-none-eabi" | target triple = "aarch64-arm-none-eabi" | ||||
define <2 x float> @complex_mul_v2f32(<2 x float> %a, <2 x float> %b) { | define <2 x float> @complex_mul_v2f32(<2 x float> %a, <2 x float> %b) { | ||||
; CHECK-LABEL: complex_mul_v2f32: | ; CHECK-LABEL: complex_mul_v2f32: | ||||
; CHECK: // %bb.0: // %entry | ; CHECK: // %bb.0: // %entry | ||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 | ; CHECK-NEXT: movi d2, #0000000000000000 | ||||
; CHECK-NEXT: dup v2.2s, v1.s[1] | ; CHECK-NEXT: fcmla v2.2s, v0.2s, v1.2s, #0 | ||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 | ; CHECK-NEXT: fcmla v2.2s, v0.2s, v1.2s, #90 | ||||
; CHECK-NEXT: dup v3.2s, v0.s[1] | ; CHECK-NEXT: fmov d0, d2 | ||||
; CHECK-NEXT: fmul v4.2s, v0.2s, v1.s[1] | |||||
; CHECK-NEXT: fmul v2.2s, v2.2s, v0.s[1] | |||||
; CHECK-NEXT: fmla v4.2s, v3.2s, v1.2s | |||||
; CHECK-NEXT: fneg v2.2s, v2.2s | |||||
; CHECK-NEXT: fmla v2.2s, v0.2s, v1.2s | |||||
; CHECK-NEXT: zip1 v0.2s, v2.2s, v4.2s | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
entry: | entry: | ||||
%a.real = shufflevector <2 x float> %a, <2 x float> poison, <1 x i32> <i32 0> | %a.real = shufflevector <2 x float> %a, <2 x float> poison, <1 x i32> <i32 0> | ||||
%a.imag = shufflevector <2 x float> %a, <2 x float> poison, <1 x i32> <i32 1> | %a.imag = shufflevector <2 x float> %a, <2 x float> poison, <1 x i32> <i32 1> | ||||
%b.real = shufflevector <2 x float> %b, <2 x float> poison, <1 x i32> <i32 0> | %b.real = shufflevector <2 x float> %b, <2 x float> poison, <1 x i32> <i32 0> | ||||
%b.imag = shufflevector <2 x float> %b, <2 x float> poison, <1 x i32> <i32 1> | %b.imag = shufflevector <2 x float> %b, <2 x float> poison, <1 x i32> <i32 1> | ||||
%0 = fmul fast <1 x float> %b.imag, %a.real | %0 = fmul fast <1 x float> %b.imag, %a.real | ||||
%1 = fmul fast <1 x float> %b.real, %a.imag | %1 = fmul fast <1 x float> %b.real, %a.imag | ||||
%2 = fadd fast <1 x float> %1, %0 | %2 = fadd fast <1 x float> %1, %0 | ||||
%3 = fmul fast <1 x float> %b.real, %a.real | %3 = fmul fast <1 x float> %b.real, %a.real | ||||
%4 = fmul fast <1 x float> %a.imag, %b.imag | %4 = fmul fast <1 x float> %a.imag, %b.imag | ||||
%5 = fsub fast <1 x float> %3, %4 | %5 = fsub fast <1 x float> %3, %4 | ||||
%interleaved.vec = shufflevector <1 x float> %5, <1 x float> %2, <2 x i32> <i32 0, i32 1> | %interleaved.vec = shufflevector <1 x float> %5, <1 x float> %2, <2 x i32> <i32 0, i32 1> | ||||
ret <2 x float> %interleaved.vec | ret <2 x float> %interleaved.vec | ||||
} | } | ||||
define <4 x float> @complex_mul_v4f32(<4 x float> %a, <4 x float> %b) { | define <4 x float> @complex_mul_v4f32(<4 x float> %a, <4 x float> %b) { | ||||
; CHECK-LABEL: complex_mul_v4f32: | ; CHECK-LABEL: complex_mul_v4f32: | ||||
; CHECK: // %bb.0: // %entry | ; CHECK: // %bb.0: // %entry | ||||
; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 | ; CHECK-NEXT: movi v2.2d, #0000000000000000 | ||||
; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 | ; CHECK-NEXT: fcmla v2.4s, v0.4s, v1.4s, #0 | ||||
; CHECK-NEXT: zip2 v4.2s, v0.2s, v2.2s | ; CHECK-NEXT: fcmla v2.4s, v0.4s, v1.4s, #90 | ||||
; CHECK-NEXT: zip1 v0.2s, v0.2s, v2.2s | ; CHECK-NEXT: mov v0.16b, v2.16b | ||||
; CHECK-NEXT: zip2 v5.2s, v1.2s, v3.2s | |||||
; CHECK-NEXT: zip1 v1.2s, v1.2s, v3.2s | |||||
; CHECK-NEXT: fmul v2.2s, v4.2s, v5.2s | |||||
; CHECK-NEXT: fmul v3.2s, v5.2s, v0.2s | |||||
; CHECK-NEXT: fneg v2.2s, v2.2s | |||||
; CHECK-NEXT: fmla v3.2s, v4.2s, v1.2s | |||||
; CHECK-NEXT: fmla v2.2s, v0.2s, v1.2s | |||||
; CHECK-NEXT: zip2 v1.2s, v2.2s, v3.2s | |||||
; CHECK-NEXT: zip1 v0.2s, v2.2s, v3.2s | |||||
; CHECK-NEXT: mov v0.d[1], v1.d[0] | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
entry: | entry: | ||||
%a.real = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2> | %a.real = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2> | ||||
%a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3> | %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3> | ||||
%b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2> | %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2> | ||||
%b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3> | %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3> | ||||
%0 = fmul fast <2 x float> %b.imag, %a.real | %0 = fmul fast <2 x float> %b.imag, %a.real | ||||
%1 = fmul fast <2 x float> %b.real, %a.imag | %1 = fmul fast <2 x float> %b.real, %a.imag | ||||
%2 = fadd fast <2 x float> %1, %0 | %2 = fadd fast <2 x float> %1, %0 | ||||
%3 = fmul fast <2 x float> %b.real, %a.real | %3 = fmul fast <2 x float> %b.real, %a.real | ||||
%4 = fmul fast <2 x float> %a.imag, %b.imag | %4 = fmul fast <2 x float> %a.imag, %b.imag | ||||
%5 = fsub fast <2 x float> %3, %4 | %5 = fsub fast <2 x float> %3, %4 | ||||
%interleaved.vec = shufflevector <2 x float> %5, <2 x float> %2, <4 x i32> <i32 0, i32 2, i32 1, i32 3> | %interleaved.vec = shufflevector <2 x float> %5, <2 x float> %2, <4 x i32> <i32 0, i32 2, i32 1, i32 3> | ||||
ret <4 x float> %interleaved.vec | ret <4 x float> %interleaved.vec | ||||
} | } | ||||
define <8 x float> @complex_mul_v8f32(<8 x float> %a, <8 x float> %b) { | define <8 x float> @complex_mul_v8f32(<8 x float> %a, <8 x float> %b) { | ||||
; CHECK-LABEL: complex_mul_v8f32: | ; CHECK-LABEL: complex_mul_v8f32: | ||||
; CHECK: // %bb.0: // %entry | ; CHECK: // %bb.0: // %entry | ||||
; CHECK-NEXT: uzp2 v4.4s, v2.4s, v3.4s | ; CHECK-NEXT: movi v4.2d, #0000000000000000 | ||||
; CHECK-NEXT: uzp2 v5.4s, v0.4s, v1.4s | ; CHECK-NEXT: movi v5.2d, #0000000000000000 | ||||
; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s | ; CHECK-NEXT: fcmla v4.4s, v0.4s, v2.4s, #0 | ||||
; CHECK-NEXT: uzp1 v2.4s, v2.4s, v3.4s | ; CHECK-NEXT: fcmla v5.4s, v1.4s, v3.4s, #0 | ||||
; CHECK-NEXT: fmul v1.4s, v5.4s, v4.4s | ; CHECK-NEXT: fcmla v4.4s, v0.4s, v2.4s, #90 | ||||
; CHECK-NEXT: fmul v3.4s, v4.4s, v0.4s | ; CHECK-NEXT: fcmla v5.4s, v1.4s, v3.4s, #90 | ||||
; CHECK-NEXT: fneg v1.4s, v1.4s | ; CHECK-NEXT: mov v0.16b, v4.16b | ||||
; CHECK-NEXT: fmla v3.4s, v5.4s, v2.4s | ; CHECK-NEXT: mov v1.16b, v5.16b | ||||
; CHECK-NEXT: fmla v1.4s, v0.4s, v2.4s | |||||
; CHECK-NEXT: zip1 v0.4s, v1.4s, v3.4s | |||||
; CHECK-NEXT: zip2 v1.4s, v1.4s, v3.4s | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
entry: | entry: | ||||
%a.real = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> | %a.real = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> | ||||
%a.imag = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> | %a.imag = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> | ||||
%b.real = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> | %b.real = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> | ||||
%b.imag = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> | %b.imag = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> | ||||
%0 = fmul fast <4 x float> %b.imag, %a.real | %0 = fmul fast <4 x float> %b.imag, %a.real | ||||
%1 = fmul fast <4 x float> %b.real, %a.imag | %1 = fmul fast <4 x float> %b.real, %a.imag | ||||
%2 = fadd fast <4 x float> %1, %0 | %2 = fadd fast <4 x float> %1, %0 | ||||
%3 = fmul fast <4 x float> %b.real, %a.real | %3 = fmul fast <4 x float> %b.real, %a.real | ||||
%4 = fmul fast <4 x float> %a.imag, %b.imag | %4 = fmul fast <4 x float> %a.imag, %b.imag | ||||
%5 = fsub fast <4 x float> %3, %4 | %5 = fsub fast <4 x float> %3, %4 | ||||
%interleaved.vec = shufflevector <4 x float> %5, <4 x float> %2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> | %interleaved.vec = shufflevector <4 x float> %5, <4 x float> %2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> | ||||
ret <8 x float> %interleaved.vec | ret <8 x float> %interleaved.vec | ||||
} | } | ||||
define <16 x float> @complex_mul_v16f32(<16 x float> %a, <16 x float> %b) { | define <16 x float> @complex_mul_v16f32(<16 x float> %a, <16 x float> %b) { | ||||
; CHECK-LABEL: complex_mul_v16f32: | ; CHECK-LABEL: complex_mul_v16f32: | ||||
; CHECK: // %bb.0: // %entry | ; CHECK: // %bb.0: // %entry | ||||
; CHECK-NEXT: uzp2 v16.4s, v4.4s, v5.4s | ; CHECK-NEXT: movi v16.2d, #0000000000000000 | ||||
; CHECK-NEXT: uzp1 v17.4s, v2.4s, v3.4s | ; CHECK-NEXT: movi v17.2d, #0000000000000000 | ||||
; CHECK-NEXT: uzp1 v18.4s, v0.4s, v1.4s | ; CHECK-NEXT: movi v18.2d, #0000000000000000 | ||||
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s | ; CHECK-NEXT: movi v19.2d, #0000000000000000 | ||||
; CHECK-NEXT: uzp2 v1.4s, v2.4s, v3.4s | ; CHECK-NEXT: fcmla v16.4s, v0.4s, v4.4s, #0 | ||||
; CHECK-NEXT: uzp2 v2.4s, v6.4s, v7.4s | ; CHECK-NEXT: fcmla v17.4s, v1.4s, v5.4s, #0 | ||||
; CHECK-NEXT: uzp1 v3.4s, v4.4s, v5.4s | ; CHECK-NEXT: fcmla v18.4s, v2.4s, v6.4s, #0 | ||||
; CHECK-NEXT: fmul v4.4s, v0.4s, v16.4s | ; CHECK-NEXT: fcmla v19.4s, v3.4s, v7.4s, #0 | ||||
; CHECK-NEXT: uzp1 v5.4s, v6.4s, v7.4s | ; CHECK-NEXT: fcmla v16.4s, v0.4s, v4.4s, #90 | ||||
; CHECK-NEXT: fmul v6.4s, v1.4s, v2.4s | ; CHECK-NEXT: fcmla v17.4s, v1.4s, v5.4s, #90 | ||||
; CHECK-NEXT: fmul v7.4s, v16.4s, v18.4s | ; CHECK-NEXT: fcmla v18.4s, v2.4s, v6.4s, #90 | ||||
; CHECK-NEXT: fneg v4.4s, v4.4s | ; CHECK-NEXT: fcmla v19.4s, v3.4s, v7.4s, #90 | ||||
; CHECK-NEXT: fmul v16.4s, v2.4s, v17.4s | ; CHECK-NEXT: mov v0.16b, v16.16b | ||||
; CHECK-NEXT: fneg v6.4s, v6.4s | ; CHECK-NEXT: mov v1.16b, v17.16b | ||||
; CHECK-NEXT: fmla v7.4s, v0.4s, v3.4s | ; CHECK-NEXT: mov v2.16b, v18.16b | ||||
; CHECK-NEXT: fmla v4.4s, v18.4s, v3.4s | ; CHECK-NEXT: mov v3.16b, v19.16b | ||||
; CHECK-NEXT: fmla v16.4s, v1.4s, v5.4s | |||||
; CHECK-NEXT: fmla v6.4s, v17.4s, v5.4s | |||||
; CHECK-NEXT: zip1 v0.4s, v4.4s, v7.4s | |||||
; CHECK-NEXT: zip2 v1.4s, v4.4s, v7.4s | |||||
; CHECK-NEXT: zip1 v2.4s, v6.4s, v16.4s | |||||
; CHECK-NEXT: zip2 v3.4s, v6.4s, v16.4s | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
entry: | entry: | ||||
%a.real = shufflevector <16 x float> %a, <16 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> | %a.real = shufflevector <16 x float> %a, <16 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> | ||||
%a.imag = shufflevector <16 x float> %a, <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> | %a.imag = shufflevector <16 x float> %a, <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> | ||||
%b.real = shufflevector <16 x float> %b, <16 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> | %b.real = shufflevector <16 x float> %b, <16 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> | ||||
%b.imag = shufflevector <16 x float> %b, <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> | %b.imag = shufflevector <16 x float> %b, <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> | ||||
%0 = fmul fast <8 x float> %b.imag, %a.real | %0 = fmul fast <8 x float> %b.imag, %a.real | ||||
%1 = fmul fast <8 x float> %b.real, %a.imag | %1 = fmul fast <8 x float> %b.real, %a.imag | ||||
%2 = fadd fast <8 x float> %1, %0 | %2 = fadd fast <8 x float> %1, %0 | ||||
%3 = fmul fast <8 x float> %b.real, %a.real | %3 = fmul fast <8 x float> %b.real, %a.real | ||||
%4 = fmul fast <8 x float> %a.imag, %b.imag | %4 = fmul fast <8 x float> %a.imag, %b.imag | ||||
%5 = fsub fast <8 x float> %3, %4 | %5 = fsub fast <8 x float> %3, %4 | ||||
%interleaved.vec = shufflevector <8 x float> %5, <8 x float> %2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> | %interleaved.vec = shufflevector <8 x float> %5, <8 x float> %2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> | ||||
ret <16 x float> %interleaved.vec | ret <16 x float> %interleaved.vec | ||||
} | } |