Please use GitHub pull requests for new patches. Avoid migrating existing patches. Phabricator shutdown timeline
Changeset View
Changeset View
Standalone View
Standalone View
llvm/test/CodeGen/AArch64/complex-arithmetic-f32-add.ll
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||||
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s | ; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s | ||||
target triple = "aarch64-arm-none-eabi" | target triple = "aarch64-arm-none-eabi" | ||||
define <2 x float> @complex_add_v2f32(<2 x float> %a, <2 x float> %b) { | define <2 x float> @complex_add_v2f32(<2 x float> %a, <2 x float> %b) { | ||||
; CHECK-LABEL: complex_add_v2f32: | ; CHECK-LABEL: complex_add_v2f32: | ||||
; CHECK: // %bb.0: // %entry | ; CHECK: // %bb.0: // %entry | ||||
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 | ; CHECK-NEXT: fcadd v0.2s, v1.2s, v0.2s, #90 | ||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 | |||||
; CHECK-NEXT: dup v2.2s, v0.s[1] | |||||
; CHECK-NEXT: dup v3.2s, v1.s[1] | |||||
; CHECK-NEXT: fsub v1.2s, v1.2s, v2.2s | |||||
; CHECK-NEXT: fadd v0.2s, v3.2s, v0.2s | |||||
; CHECK-NEXT: zip1 v0.2s, v1.2s, v0.2s | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
entry: | entry: | ||||
%a.real = shufflevector <2 x float> %a, <2 x float> zeroinitializer, <1 x i32> <i32 0> | %a.real = shufflevector <2 x float> %a, <2 x float> zeroinitializer, <1 x i32> <i32 0> | ||||
%a.imag = shufflevector <2 x float> %a, <2 x float> zeroinitializer, <1 x i32> <i32 1> | %a.imag = shufflevector <2 x float> %a, <2 x float> zeroinitializer, <1 x i32> <i32 1> | ||||
%b.real = shufflevector <2 x float> %b, <2 x float> zeroinitializer, <1 x i32> <i32 0> | %b.real = shufflevector <2 x float> %b, <2 x float> zeroinitializer, <1 x i32> <i32 0> | ||||
%b.imag = shufflevector <2 x float> %b, <2 x float> zeroinitializer, <1 x i32> <i32 1> | %b.imag = shufflevector <2 x float> %b, <2 x float> zeroinitializer, <1 x i32> <i32 1> | ||||
%0 = fsub fast <1 x float> %b.real, %a.imag | %0 = fsub fast <1 x float> %b.real, %a.imag | ||||
%1 = fadd fast <1 x float> %b.imag, %a.real | %1 = fadd fast <1 x float> %b.imag, %a.real | ||||
%interleaved.vec = shufflevector <1 x float> %0, <1 x float> %1, <2 x i32> <i32 0, i32 1> | %interleaved.vec = shufflevector <1 x float> %0, <1 x float> %1, <2 x i32> <i32 0, i32 1> | ||||
ret <2 x float> %interleaved.vec | ret <2 x float> %interleaved.vec | ||||
} | } | ||||
define <4 x float> @complex_add_v4f32(<4 x float> %a, <4 x float> %b) { | define <4 x float> @complex_add_v4f32(<4 x float> %a, <4 x float> %b) { | ||||
; CHECK-LABEL: complex_add_v4f32: | ; CHECK-LABEL: complex_add_v4f32: | ||||
; CHECK: // %bb.0: // %entry | ; CHECK: // %bb.0: // %entry | ||||
; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 | ; CHECK-NEXT: fcadd v0.4s, v1.4s, v0.4s, #90 | ||||
; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 | |||||
; CHECK-NEXT: zip1 v4.2s, v0.2s, v2.2s | |||||
; CHECK-NEXT: zip2 v0.2s, v0.2s, v2.2s | |||||
; CHECK-NEXT: zip1 v2.2s, v1.2s, v3.2s | |||||
; CHECK-NEXT: zip2 v1.2s, v1.2s, v3.2s | |||||
; CHECK-NEXT: fsub v0.2s, v2.2s, v0.2s | |||||
; CHECK-NEXT: fadd v1.2s, v1.2s, v4.2s | |||||
; CHECK-NEXT: zip2 v2.2s, v0.2s, v1.2s | |||||
; CHECK-NEXT: zip1 v0.2s, v0.2s, v1.2s | |||||
; CHECK-NEXT: mov v0.d[1], v2.d[0] | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
entry: | entry: | ||||
%a.real = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <2 x i32> <i32 0, i32 2> | %a.real = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <2 x i32> <i32 0, i32 2> | ||||
%a.imag = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <2 x i32> <i32 1, i32 3> | %a.imag = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <2 x i32> <i32 1, i32 3> | ||||
%b.real = shufflevector <4 x float> %b, <4 x float> zeroinitializer, <2 x i32> <i32 0, i32 2> | %b.real = shufflevector <4 x float> %b, <4 x float> zeroinitializer, <2 x i32> <i32 0, i32 2> | ||||
%b.imag = shufflevector <4 x float> %b, <4 x float> zeroinitializer, <2 x i32> <i32 1, i32 3> | %b.imag = shufflevector <4 x float> %b, <4 x float> zeroinitializer, <2 x i32> <i32 1, i32 3> | ||||
%0 = fsub fast <2 x float> %b.real, %a.imag | %0 = fsub fast <2 x float> %b.real, %a.imag | ||||
%1 = fadd fast <2 x float> %b.imag, %a.real | %1 = fadd fast <2 x float> %b.imag, %a.real | ||||
%interleaved.vec = shufflevector <2 x float> %0, <2 x float> %1, <4 x i32> <i32 0, i32 2, i32 1, i32 3> | %interleaved.vec = shufflevector <2 x float> %0, <2 x float> %1, <4 x i32> <i32 0, i32 2, i32 1, i32 3> | ||||
ret <4 x float> %interleaved.vec | ret <4 x float> %interleaved.vec | ||||
} | } | ||||
define <8 x float> @complex_add_v8f32(<8 x float> %a, <8 x float> %b) { | define <8 x float> @complex_add_v8f32(<8 x float> %a, <8 x float> %b) { | ||||
; CHECK-LABEL: complex_add_v8f32: | ; CHECK-LABEL: complex_add_v8f32: | ||||
; CHECK: // %bb.0: // %entry | ; CHECK: // %bb.0: // %entry | ||||
; CHECK-NEXT: uzp1 v4.4s, v2.4s, v3.4s | ; CHECK-NEXT: fcadd v0.4s, v2.4s, v0.4s, #90 | ||||
; CHECK-NEXT: uzp1 v5.4s, v0.4s, v1.4s | ; CHECK-NEXT: fcadd v1.4s, v3.4s, v1.4s, #90 | ||||
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s | |||||
; CHECK-NEXT: uzp2 v1.4s, v2.4s, v3.4s | |||||
; CHECK-NEXT: fsub v2.4s, v4.4s, v0.4s | |||||
; CHECK-NEXT: fadd v1.4s, v1.4s, v5.4s | |||||
; CHECK-NEXT: zip1 v0.4s, v2.4s, v1.4s | |||||
; CHECK-NEXT: zip2 v1.4s, v2.4s, v1.4s | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
entry: | entry: | ||||
%a.real = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6> | %a.real = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6> | ||||
%a.imag = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <4 x i32> <i32 1, i32 3, i32 5, i32 7> | %a.imag = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <4 x i32> <i32 1, i32 3, i32 5, i32 7> | ||||
%b.real = shufflevector <8 x float> %b, <8 x float> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6> | %b.real = shufflevector <8 x float> %b, <8 x float> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6> | ||||
%b.imag = shufflevector <8 x float> %b, <8 x float> zeroinitializer, <4 x i32> <i32 1, i32 3, i32 5, i32 7> | %b.imag = shufflevector <8 x float> %b, <8 x float> zeroinitializer, <4 x i32> <i32 1, i32 3, i32 5, i32 7> | ||||
%0 = fsub fast <4 x float> %b.real, %a.imag | %0 = fsub fast <4 x float> %b.real, %a.imag | ||||
%1 = fadd fast <4 x float> %b.imag, %a.real | %1 = fadd fast <4 x float> %b.imag, %a.real | ||||
%interleaved.vec = shufflevector <4 x float> %0, <4 x float> %1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> | %interleaved.vec = shufflevector <4 x float> %0, <4 x float> %1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> | ||||
ret <8 x float> %interleaved.vec | ret <8 x float> %interleaved.vec | ||||
} | } | ||||
define <16 x float> @complex_add_v16f32(<16 x float> %a, <16 x float> %b) { | define <16 x float> @complex_add_v16f32(<16 x float> %a, <16 x float> %b) { | ||||
; CHECK-LABEL: complex_add_v16f32: | ; CHECK-LABEL: complex_add_v16f32: | ||||
; CHECK: // %bb.0: // %entry | ; CHECK: // %bb.0: // %entry | ||||
; CHECK-NEXT: uzp1 v16.4s, v4.4s, v5.4s | ; CHECK-NEXT: fcadd v2.4s, v6.4s, v2.4s, #90 | ||||
; CHECK-NEXT: uzp1 v17.4s, v2.4s, v3.4s | ; CHECK-NEXT: fcadd v0.4s, v4.4s, v0.4s, #90 | ||||
; CHECK-NEXT: uzp1 v18.4s, v0.4s, v1.4s | ; CHECK-NEXT: fcadd v1.4s, v5.4s, v1.4s, #90 | ||||
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s | ; CHECK-NEXT: fcadd v3.4s, v7.4s, v3.4s, #90 | ||||
; CHECK-NEXT: uzp2 v1.4s, v2.4s, v3.4s | |||||
; CHECK-NEXT: uzp2 v2.4s, v4.4s, v5.4s | |||||
; CHECK-NEXT: uzp1 v3.4s, v6.4s, v7.4s | |||||
; CHECK-NEXT: uzp2 v4.4s, v6.4s, v7.4s | |||||
; CHECK-NEXT: fsub v5.4s, v16.4s, v0.4s | |||||
; CHECK-NEXT: fadd v2.4s, v2.4s, v18.4s | |||||
; CHECK-NEXT: fsub v3.4s, v3.4s, v1.4s | |||||
; CHECK-NEXT: fadd v4.4s, v4.4s, v17.4s | |||||
; CHECK-NEXT: zip1 v0.4s, v5.4s, v2.4s | |||||
; CHECK-NEXT: zip2 v1.4s, v5.4s, v2.4s | |||||
; CHECK-NEXT: zip1 v2.4s, v3.4s, v4.4s | |||||
; CHECK-NEXT: zip2 v3.4s, v3.4s, v4.4s | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
entry: | entry: | ||||
%a.real = shufflevector <16 x float> %a, <16 x float> zeroinitializer, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> | %a.real = shufflevector <16 x float> %a, <16 x float> zeroinitializer, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> | ||||
%a.imag = shufflevector <16 x float> %a, <16 x float> zeroinitializer, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> | %a.imag = shufflevector <16 x float> %a, <16 x float> zeroinitializer, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> | ||||
%b.real = shufflevector <16 x float> %b, <16 x float> zeroinitializer, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> | %b.real = shufflevector <16 x float> %b, <16 x float> zeroinitializer, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> | ||||
%b.imag = shufflevector <16 x float> %b, <16 x float> zeroinitializer, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> | %b.imag = shufflevector <16 x float> %b, <16 x float> zeroinitializer, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> | ||||
%0 = fsub fast <8 x float> %b.real, %a.imag | %0 = fsub fast <8 x float> %b.real, %a.imag | ||||
%1 = fadd fast <8 x float> %b.imag, %a.real | %1 = fadd fast <8 x float> %b.imag, %a.real | ||||
%interleaved.vec = shufflevector <8 x float> %0, <8 x float> %1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> | %interleaved.vec = shufflevector <8 x float> %0, <8 x float> %1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> | ||||
ret <16 x float> %interleaved.vec | ret <16 x float> %interleaved.vec | ||||
} | } |