diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll b/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll --- a/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll @@ -1,13 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast \ -; RUN: < %s -verify-machineinstrs -asm-verbose=false | FileCheck %s +; RUN: < %s -verify-machineinstrs | FileCheck %s define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) { ; CHECK-LABEL: test_select_cc_v8i8_i8: -; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 -; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 -; CHECK: cmeq [[MASK:v[0-9]+]].8b, v[[LHS]].8b, v[[RHS]].8b -; CHECK: dup [[DUPMASK:v[0-9]+]].8b, [[MASK]].b[0] -; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s2, w1 +; CHECK-NEXT: fmov s3, w0 +; CHECK-NEXT: cmeq v2.8b, v3.8b, v2.8b +; CHECK-NEXT: dup v2.8b, v2.b[0] +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %cmp31 = icmp eq i8 %a, %b %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d ret <8x i8> %e @@ -15,9 +19,13 @@ define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) { ; CHECK-LABEL: test_select_cc_v8i8_f32: -; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s -; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0] -; CHECK-NEXT: bsl [[DUPMASK]].8b, v2.8b, v3.8b +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $s0 killed $s0 def $d0 +; CHECK-NEXT: // kill: def $s1 killed $s1 def $d1 +; CHECK-NEXT: fcmeq v0.2s, v0.2s, v1.2s +; CHECK-NEXT: dup v0.2s, v0.s[0] +; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b +; CHECK-NEXT: ret %cmp31 = fcmp oeq float %a, %b %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d ret <8x i8> %e @@ -25,8 +33,10 @@ define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) { ; CHECK-LABEL: test_select_cc_v8i8_f64: -; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1 -; CHECK-NEXT: bsl v[[MASK]].8b, v2.8b, v3.8b +; CHECK: // %bb.0: +; CHECK-NEXT: fcmeq d0, d0, d1 +; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b +; CHECK-NEXT: ret %cmp31 = fcmp oeq double %a, %b %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d ret <8x i8> %e @@ -34,11 +44,14 @@ define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) { ; CHECK-LABEL: test_select_cc_v16i8_i8: -; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 -; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 -; CHECK: cmeq [[MASK:v[0-9]+]].16b, v[[LHS]].16b, v[[RHS]].16b -; CHECK: dup [[DUPMASK:v[0-9]+]].16b, [[MASK]].b[0] -; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s2, w1 +; CHECK-NEXT: fmov s3, w0 +; CHECK-NEXT: cmeq v2.16b, v3.16b, v2.16b +; CHECK-NEXT: dup v2.16b, v2.b[0] +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %cmp31 = icmp eq i8 %a, %b %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d ret <16x i8> %e @@ -46,9 +59,13 @@ define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) { ; CHECK-LABEL: test_select_cc_v16i8_f32: -; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s -; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0] -; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1 +; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b +; CHECK-NEXT: ret %cmp31 = fcmp oeq float %a, %b %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d ret <16x i8> %e @@ -56,9 +73,13 @@ define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) { ; CHECK-LABEL: test_select_cc_v16i8_f64: -; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d -; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0] -; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: fcmeq v0.2d, v0.2d, v1.2d +; CHECK-NEXT: dup v0.2d, v0.d[0] +; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b +; CHECK-NEXT: ret %cmp31 = fcmp oeq double %a, %b %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d ret <16x i8> %e @@ -66,11 +87,14 @@ define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) { ; CHECK-LABEL: test_select_cc_v4i16: -; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 -; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 -; CHECK: cmeq [[MASK:v[0-9]+]].4h, v[[LHS]].4h, v[[RHS]].4h -; CHECK: dup [[DUPMASK:v[0-9]+]].4h, [[MASK]].h[0] -; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s2, w1 +; CHECK-NEXT: fmov s3, w0 +; CHECK-NEXT: cmeq v2.4h, v3.4h, v2.4h +; CHECK-NEXT: dup v2.4h, v2.h[0] +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %cmp31 = icmp eq i16 %a, %b %e = select i1 %cmp31, <4x i16> %c, <4x i16> %d ret <4x i16> %e @@ -78,11 +102,14 @@ define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) { ; CHECK-LABEL: test_select_cc_v8i16: -; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 -; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 -; CHECK: cmeq [[MASK:v[0-9]+]].8h, v[[LHS]].8h, v[[RHS]].8h -; CHECK: dup [[DUPMASK:v[0-9]+]].8h, [[MASK]].h[0] -; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s2, w1 +; CHECK-NEXT: fmov s3, w0 +; CHECK-NEXT: cmeq v2.8h, v3.8h, v2.8h +; CHECK-NEXT: dup v2.8h, v2.h[0] +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %cmp31 = icmp eq i16 %a, %b %e = select i1 %cmp31, <8x i16> %c, <8x i16> %d ret <8x i16> %e @@ -90,11 +117,14 @@ define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) { ; CHECK-LABEL: test_select_cc_v2i32: -; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 -; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 -; CHECK: cmeq [[MASK:v[0-9]+]].2s, v[[LHS]].2s, v[[RHS]].2s -; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0] -; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s2, w1 +; CHECK-NEXT: fmov s3, w0 +; CHECK-NEXT: cmeq v2.2s, v3.2s, v2.2s +; CHECK-NEXT: dup v2.2s, v2.s[0] +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %cmp31 = icmp eq i32 %a, %b %e = select i1 %cmp31, <2x i32> %c, <2x i32> %d ret <2x i32> %e @@ -102,11 +132,14 @@ define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) { ; CHECK-LABEL: test_select_cc_v4i32: -; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 -; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 -; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s -; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0] -; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s2, w1 +; CHECK-NEXT: fmov s3, w0 +; CHECK-NEXT: cmeq v2.4s, v3.4s, v2.4s +; CHECK-NEXT: dup v2.4s, v2.s[0] +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %cmp31 = icmp eq i32 %a, %b %e = select i1 %cmp31, <4x i32> %c, <4x i32> %d ret <4x i32> %e @@ -114,10 +147,13 @@ define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) { ; CHECK-LABEL: test_select_cc_v1i64: -; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0 -; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1 -; CHECK: cmeq d[[MASK:[0-9]+]], d[[LHS]], d[[RHS]] -; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b +; CHECK: // %bb.0: +; CHECK-NEXT: fmov d2, x1 +; CHECK-NEXT: fmov d3, x0 +; CHECK-NEXT: cmeq d2, d3, d2 +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %cmp31 = icmp eq i64 %a, %b %e = select i1 %cmp31, <1x i64> %c, <1x i64> %d ret <1x i64> %e @@ -125,11 +161,14 @@ define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) { ; CHECK-LABEL: test_select_cc_v2i64: -; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0 -; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1 -; CHECK: cmeq [[MASK:v[0-9]+]].2d, v[[LHS]].2d, v[[RHS]].2d -; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0] -; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b +; CHECK: // %bb.0: +; CHECK-NEXT: fmov d2, x1 +; CHECK-NEXT: fmov d3, x0 +; CHECK-NEXT: cmeq v2.2d, v3.2d, v2.2d +; CHECK-NEXT: dup v2.2d, v2.d[0] +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %cmp31 = icmp eq i64 %a, %b %e = select i1 %cmp31, <2x i64> %c, <2x i64> %d ret <2x i64> %e @@ -137,8 +176,12 @@ define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) { ; CHECK-LABEL: test_select_cc_v1f32: -; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s -; CHECK-NEXT: bsl [[MASK]].8b, v2.8b, v3.8b +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $s0 killed $s0 def $d0 +; CHECK-NEXT: // kill: def $s1 killed $s1 def $d1 +; CHECK-NEXT: fcmeq v0.2s, v0.2s, v1.2s +; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b +; CHECK-NEXT: ret %cmp31 = fcmp oeq float %a, %b %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d ret <1 x float> %e @@ -146,9 +189,13 @@ define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) { ; CHECK-LABEL: test_select_cc_v2f32: -; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s -; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0] -; CHECK: bsl [[DUPMASK]].8b, v2.8b, v3.8b +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $s0 killed $s0 def $d0 +; CHECK-NEXT: // kill: def $s1 killed $s1 def $d1 +; CHECK-NEXT: fcmeq v0.2s, v0.2s, v1.2s +; CHECK-NEXT: dup v0.2s, v0.s[0] +; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b +; CHECK-NEXT: ret %cmp31 = fcmp oeq float %a, %b %e = select i1 %cmp31, <2 x float> %c, <2 x float> %d ret <2 x float> %e @@ -156,9 +203,13 @@ define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) { ; CHECK-LABEL: test_select_cc_v4f32: -; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s -; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0] -; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1 +; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b +; CHECK-NEXT: ret %cmp31 = fcmp oeq float %a, %b %e = select i1 %cmp31, <4x float> %c, <4x float> %d ret <4x float> %e @@ -166,11 +217,14 @@ define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) { ; CHECK-LABEL: test_select_cc_v4f32_icmp: -; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 -; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 -; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s -; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0] -; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s2, w1 +; CHECK-NEXT: fmov s3, w0 +; CHECK-NEXT: cmeq v2.4s, v3.4s, v2.4s +; CHECK-NEXT: dup v2.4s, v2.s[0] +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %cmp31 = icmp eq i32 %a, %b %e = select i1 %cmp31, <4x float> %c, <4x float> %d ret <4x float> %e @@ -178,8 +232,10 @@ define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) { ; CHECK-LABEL: test_select_cc_v1f64: -; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1 -; CHECK: bsl v[[MASK]].8b, v2.8b, v3.8b +; CHECK: // %bb.0: +; CHECK-NEXT: fcmeq d0, d0, d1 +; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b +; CHECK-NEXT: ret %cmp31 = fcmp oeq double %a, %b %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d ret <1 x double> %e @@ -187,10 +243,13 @@ define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c, <1 x double> %d ) { ; CHECK-LABEL: test_select_cc_v1f64_icmp: -; CHECK-DAG: fmov [[LHS:d[0-9]+]], x0 -; CHECK-DAG: fmov [[RHS:d[0-9]+]], x1 -; CHECK: cmeq d[[MASK:[0-9]+]], [[LHS]], [[RHS]] -; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b +; CHECK: // %bb.0: +; CHECK-NEXT: fmov d2, x1 +; CHECK-NEXT: fmov d3, x0 +; CHECK-NEXT: cmeq d2, d3, d2 +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %cmp31 = icmp eq i64 %a, %b %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d ret <1 x double> %e @@ -198,9 +257,13 @@ define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) { ; CHECK-LABEL: test_select_cc_v2f64: -; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d -; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0] -; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: fcmeq v0.2d, v0.2d, v1.2d +; CHECK-NEXT: dup v0.2d, v0.d[0] +; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b +; CHECK-NEXT: ret %cmp31 = fcmp oeq double %a, %b %e = select i1 %cmp31, <2 x double> %c, <2 x double> %d ret <2 x double> %e @@ -211,11 +274,13 @@ ; Part of PR21549. define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_select_cc_v2i32_icmpi1: -; CHECK: tst w0, #0x1 -; CHECK: csetm [[MASK:w[0-9]+]], ne -; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]] -; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b -; CHECK: mov v0.16b, [[DUPMASK]].16b +; CHECK: // %bb.0: +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: dup v2.2s, w8 +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %cmp = icmp ne i1 %cc, 0 %e = select i1 %cmp, <2 x i32> %a, <2 x i32> %b ret <2 x i32> %e @@ -224,11 +289,14 @@ ; Also make sure we support irregular/non-power-of-2 types such as v3f32. define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b, float %c1, float %c2) #0 { ; CHECK-LABEL: test_select_cc_v3f32_fcmp_f32: -; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].4s, v2.4s, v3.4s -; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0] -; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b -; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2 +; CHECK-NEXT: // kill: def $s3 killed $s3 def $q3 +; CHECK-NEXT: fcmeq v2.4s, v2.4s, v3.4s +; CHECK-NEXT: dup v2.4s, v2.s[0] +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %cc = fcmp oeq float %c1, %c2 %r = select i1 %cc, <3 x float> %a, <3 x float> %b ret <3 x float> %r @@ -236,11 +304,14 @@ define <3 x float> @test_select_cc_v3f32_fcmp_f64(<3 x float> %a, <3 x float> %b, double %c1, double %c2) #0 { ; CHECK-LABEL: test_select_cc_v3f32_fcmp_f64: -; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].2d, v2.2d, v3.2d -; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0] -; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b -; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-NEXT: fcmeq v2.2d, v2.2d, v3.2d +; CHECK-NEXT: dup v2.2d, v2.d[0] +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret %cc = fcmp oeq double %c1, %c2 %r = select i1 %cc, <3 x float> %a, <3 x float> %b ret <3 x float> %r diff --git a/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll --- a/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll @@ -1,9 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s ; float16x4_t select_64(float16x4_t a, float16x4_t b, uint16x4_t c) { return vbsl_u16(c, a, b); } define <4 x half> @select_64(<4 x half> %a, <4 x half> %b, <4 x i16> %c) #0 { ; CHECK-LABEL: select_64: -; CHECK: bsl +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret entry: %0 = bitcast <4 x half> %a to <4 x i16> %1 = bitcast <4 x half> %b to <4 x i16> @@ -18,7 +22,10 @@ ; float16x8_t select_128(float16x8_t a, float16x8_t b, uint16x8_t c) { return vbslq_u16(c, a, b); } define <8 x half> @select_128(<8 x half> %a, <8 x half> %b, <8 x i16> %c) #0 { ; CHECK-LABEL: select_128: -; CHECK: bsl +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret entry: %0 = bitcast <8 x half> %a to <8 x i16> %1 = bitcast <8 x half> %b to <8 x i16> @@ -35,7 +42,12 @@ ; } define <4 x half> @lane_64_64(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-LABEL: lane_64_64: -; CHECK: mov v{{[0-9]+}}.h +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v0.h[1], v1.h[2] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %0 = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> ret <4 x half> %0 @@ -46,7 +58,10 @@ ; } define <8 x half> @lane_128_64(<8 x half> %a, <4 x half> %b) #0 { ; CHECK-LABEL: lane_128_64: -; CHECK: mov v{{[0-9]+}}.h +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v0.h[1], v1.h[2] +; CHECK-NEXT: ret entry: %0 = bitcast <4 x half> %b to <4 x i16> %vget_lane = extractelement <4 x i16> %0, i32 2 @@ -61,7 +76,11 @@ ; } define <4 x half> @lane_64_128(<4 x half> %a, <8 x half> %b) #0 { ; CHECK-LABEL: lane_64_128: -; CHECK: mov v{{[0-9]+}}.h +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov v0.h[3], v1.h[5] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %0 = bitcast <8 x half> %b to <8 x i16> %vgetq_lane = extractelement <8 x i16> %0, i32 5 @@ -76,7 +95,9 @@ ; } define <8 x half> @lane_128_128(<8 x half> %a, <8 x half> %b) #0 { ; CHECK-LABEL: lane_128_128: -; CHECK: mov v{{[0-9]+}}.h +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov v0.h[3], v1.h[5] +; CHECK-NEXT: ret entry: %0 = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> ret <8 x half> %0 @@ -87,7 +108,9 @@ ; } define <4 x half> @ext_64(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-LABEL: ext_64: -; CHECK: ext +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #6 +; CHECK-NEXT: ret entry: %0 = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> ret <4 x half> %0 @@ -98,7 +121,9 @@ ; } define <8 x half> @ext_128(<8 x half> %a, <8 x half> %b) #0 { ; CHECK-LABEL: ext_128: -; CHECK: ext +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #6 +; CHECK-NEXT: ret entry: %0 = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> ret <8 x half> %0 @@ -108,9 +133,11 @@ ; return vrev32_s16(a); ; } define <4 x half> @rev32_64(<4 x half> %a) #0 { -entry: ; CHECK-LABEL: rev32_64: -; CHECK: rev32 +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rev32 v0.4h, v0.4h +; CHECK-NEXT: ret +entry: %0 = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> ret <4 x half> %0 } @@ -119,9 +146,11 @@ ; return vrev64_s16(a); ; } define <4 x half> @rev64_64(<4 x half> %a) #0 { -entry: ; CHECK-LABEL: rev64_64: -; CHECK: rev64 +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rev64 v0.4h, v0.4h +; CHECK-NEXT: ret +entry: %0 = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> ret <4 x half> %0 } @@ -130,9 +159,11 @@ ; return vrev32q_s16(a); ; } define <8 x half> @rev32_128(<8 x half> %a) #0 { -entry: ; CHECK-LABEL: rev32_128: -; CHECK: rev32 +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rev32 v0.8h, v0.8h +; CHECK-NEXT: ret +entry: %0 = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> ret <8 x half> %0 } @@ -141,9 +172,11 @@ ; return vrev64q_s16(a); ; } define <8 x half> @rev64_128(<8 x half> %a) #0 { -entry: ; CHECK-LABEL: rev64_128: -; CHECK: rev64 +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rev64 v0.8h, v0.8h +; CHECK-NEXT: ret +entry: %0 = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> ret <8 x half> %0 } @@ -151,7 +184,9 @@ ; float16x4_t create_64(long long a) { return vcreate_f16(a); } define <4 x half> @create_64(i64 %a) #0 { ; CHECK-LABEL: create_64: -; CHECK: fmov +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ret entry: %0 = bitcast i64 %a to <4 x half> ret <4 x half> %0 @@ -160,7 +195,10 @@ ; float16x4_t dup_64(__fp16 a) { return vdup_n_f16(a); } define <4 x half> @dup_64(half %a) #0 { ; CHECK-LABEL: dup_64: -; CHECK: dup +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-NEXT: dup v0.4h, v0.h[0] +; CHECK-NEXT: ret entry: %vecinit = insertelement <4 x half> undef, half %a, i32 0 %vecinit1 = insertelement <4 x half> %vecinit, half %a, i32 1 @@ -171,9 +209,12 @@ ; float16x8_t dup_128(__fp16 a) { return vdupq_n_f16(a); } define <8 x half> @dup_128(half %a) #0 { -entry: ; CHECK-LABEL: dup_128: -; CHECK: dup +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret +entry: %vecinit = insertelement <8 x half> undef, half %a, i32 0 %vecinit1 = insertelement <8 x half> %vecinit, half %a, i32 1 %vecinit2 = insertelement <8 x half> %vecinit1, half %a, i32 2 @@ -187,45 +228,59 @@ ; float16x4_t dup_lane_64(float16x4_t a) { return vdup_lane_f16(a, 2); } define <4 x half> @dup_lane_64(<4 x half> %a) #0 { -entry: ; CHECK-LABEL: dup_lane_64: -; CHECK: dup +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v0.4h, v0.h[2] +; CHECK-NEXT: ret +entry: %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> ret <4 x half> %shuffle } ; float16x8_t dup_lane_128(float16x4_t a) { return vdupq_lane_f16(a, 2); } define <8 x half> @dup_lane_128(<4 x half> %a) #0 { -entry: ; CHECK-LABEL: dup_lane_128: -; CHECK: dup +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v0.8h, v0.h[2] +; CHECK-NEXT: ret +entry: %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <8 x i32> ret <8 x half> %shuffle } ; float16x4_t dup_laneq_64(float16x8_t a) { return vdup_laneq_f16(a, 2); } define <4 x half> @dup_laneq_64(<8 x half> %a) #0 { -entry: ; CHECK-LABEL: dup_laneq_64: -; CHECK: dup +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: dup v0.4h, v0.h[2] +; CHECK-NEXT: ret +entry: %shuffle = shufflevector <8 x half> %a, <8 x half> undef, <4 x i32> ret <4 x half> %shuffle } ; float16x8_t dup_laneq_128(float16x8_t a) { return vdupq_laneq_f16(a, 2); } define <8 x half> @dup_laneq_128(<8 x half> %a) #0 { -entry: ; CHECK-LABEL: dup_laneq_128: -; CHECK: dup +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: dup v0.8h, v0.h[2] +; CHECK-NEXT: ret +entry: %shuffle = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> ret <8 x half> %shuffle } ; float16x8_t vcombine(float16x4_t a, float16x4_t b) { return vcombine_f16(a, b); } define <8 x half> @vcombine(<4 x half> %a, <4 x half> %b) #0 { -entry: ; CHECK-LABEL: vcombine: -; CHECK: mov v0.d[1], v1.d[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ret +entry: %shuffle.i = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> ret <8 x half> %shuffle.i } @@ -233,7 +288,10 @@ ; float16x4_t get_high(float16x8_t a) { return vget_high_f16(a); } define <4 x half> @get_high(<8 x half> %a) #0 { ; CHECK-LABEL: get_high: -; CHECK: ext +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <4 x i32> ret <4 x half> %shuffle.i @@ -243,7 +301,9 @@ ; float16x4_t get_low(float16x8_t a) { return vget_low_f16(a); } define <4 x half> @get_low(<8 x half> %a) #0 { ; CHECK-LABEL: get_low: -; CHECK-NOT: ext +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <4 x i32> ret <4 x half> %shuffle.i @@ -252,8 +312,13 @@ ; float16x4_t set_lane_64(float16x4_t a, __fp16 b) { return vset_lane_f16(b, a, 2); } define <4 x half> @set_lane_64(<4 x half> %a, half %b) #0 { ; CHECK-LABEL: set_lane_64: -; CHECK: fmov -; CHECK: mov v{{[0-9]+}}.h +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $h1 killed $h1 def $s1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %0 = bitcast half %b to i16 %1 = bitcast <4 x half> %a to <4 x i16> @@ -266,8 +331,11 @@ ; float16x8_t set_lane_128(float16x8_t a, __fp16 b) { return vsetq_lane_f16(b, a, 2); } define <8 x half> @set_lane_128(<8 x half> %a, half %b) #0 { ; CHECK-LABEL: set_lane_128: -; CHECK: fmov -; CHECK: mov v{{[0-9]+}}.h +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $h1 killed $h1 def $s1 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: ret entry: %0 = bitcast half %b to i16 %1 = bitcast <8 x half> %a to <8 x i16> @@ -279,8 +347,12 @@ ; __fp16 get_lane_64(float16x4_t a) { return vget_lane_f16(a, 2); } define half @get_lane_64(<4 x half> %a) #0 { ; CHECK-LABEL: get_lane_64: -; CHECK: umov -; CHECK: fmov +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w8, v0.h[2] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-NEXT: ret entry: %0 = bitcast <4 x half> %a to <4 x i16> %vget_lane = extractelement <4 x i16> %0, i32 2 @@ -291,8 +363,11 @@ ; __fp16 get_lane_128(float16x8_t a) { return vgetq_lane_f16(a, 2); } define half @get_lane_128(<8 x half> %a) #0 { ; CHECK-LABEL: get_lane_128: -; CHECK: umov -; CHECK: fmov +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umov w8, v0.h[2] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-NEXT: ret entry: %0 = bitcast <8 x half> %a to <8 x i16> %vgetq_lane = extractelement <8 x i16> %0, i32 2