diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -483,15 +483,6 @@ defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", AArch64fmaxv_p>; defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", AArch64fminv_p>; - // Use more efficient NEON instructions to extract elements within the NEON - // part (first 128bits) of an SVE register. - def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)), - (f16 (EXTRACT_SUBREG (v8f16 (EXTRACT_SUBREG ZPR:$Zs, zsub)), hsub))>; - def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)), - (f32 (EXTRACT_SUBREG (v4f32 (EXTRACT_SUBREG ZPR:$Zs, zsub)), ssub))>; - def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)), - (f64 (EXTRACT_SUBREG (v2f64 (EXTRACT_SUBREG ZPR:$Zs, zsub)), dsub))>; - // Splat immediate (unpredicated) defm DUP_ZI : sve_int_dup_imm<"dup">; defm FDUP_ZI : sve_int_dup_fpimm<"fdup">; @@ -2162,6 +2153,28 @@ (DUP_ZR_D $index)), $src)>; + // Extract element from vector with scalar index + def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), GPR64:$index)), + (LASTB_RPZ_B (WHILELS_PXX_B XZR, GPR64:$index), ZPR:$vec)>; + def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), GPR64:$index)), + (LASTB_RPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>; + def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), GPR64:$index)), + (LASTB_RPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>; + def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), GPR64:$index)), + (LASTB_RPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>; + def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), GPR64:$index)), + (LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>; + def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), GPR64:$index)), + (LASTB_VPZ_H (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>; + def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), GPR64:$index)), + (LASTB_VPZ_H (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>; + def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), GPR64:$index)), + (LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>; + def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), GPR64:$index)), + (LASTB_VPZ_S (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>; + def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), GPR64:$index)), + (LASTB_VPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>; + // Extract element from vector with immediate index def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)), (EXTRACT_SUBREG (DUP_ZZI_B ZPR:$vec, sve_elm_idx_extdup_b:$index), ssub)>; @@ -2173,34 +2186,54 @@ (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>; def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)), (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>; + def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)), + (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>; + def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)), + (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), hsub)>; + def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)), + (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), hsub)>; def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)), (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), ssub)>; + def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)), + (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), ssub)>; def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)), (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>; - // Extract element from vector with scalar index - def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), GPR64:$index)), - (LASTB_RPZ_B (WHILELS_PXX_B XZR, GPR64:$index), - ZPR:$vec)>; - def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), GPR64:$index)), - (LASTB_RPZ_H (WHILELS_PXX_H XZR, GPR64:$index), - ZPR:$vec)>; - def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), GPR64:$index)), - (LASTB_RPZ_S (WHILELS_PXX_S XZR, GPR64:$index), - ZPR:$vec)>; - def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), GPR64:$index)), - (LASTB_RPZ_D (WHILELS_PXX_D XZR, GPR64:$index), - ZPR:$vec)>; + // Extract element from vector with immediate index that's within the bottom 128-bits. + let AddedComplexity = 1 in { + def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)), + (i32 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>; + def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)), + (i32 (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>; + def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index)), + (i32 (UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>; + def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), VectorIndexD:$index)), + (i64 (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index))>; + } - def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), GPR64:$index)), - (LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index), - ZPR:$vec)>; - def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), GPR64:$index)), - (LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index), - ZPR:$vec)>; - def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), GPR64:$index)), - (LASTB_VPZ_D (WHILELS_PXX_D XZR, GPR64:$index), - ZPR:$vec)>; + // Extract first element from vector. + let AddedComplexity = 2 in { + def : Pat<(vector_extract (nxv16i8 ZPR:$Zs), (i64 0)), + (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>; + def : Pat<(vector_extract (nxv8i16 ZPR:$Zs), (i64 0)), + (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>; + def : Pat<(vector_extract (nxv4i32 ZPR:$Zs), (i64 0)), + (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>; + def : Pat<(vector_extract (nxv2i64 ZPR:$Zs), (i64 0)), + (i64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>; + def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)), + (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>; + def : Pat<(vector_extract (nxv4f16 ZPR:$Zs), (i64 0)), + (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>; + def : Pat<(vector_extract (nxv2f16 ZPR:$Zs), (i64 0)), + (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>; + def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)), + (f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>; + def : Pat<(vector_extract (nxv2f32 ZPR:$Zs), (i64 0)), + (f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>; + def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)), + (f64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>; + } } let Predicates = [HasSVE, HasMatMulInt8] in { diff --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll @@ -1,69 +1,125 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s +; RUN: llc < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t ; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning -define i8 @test_lane0_16xi8( %a) { +target triple = "aarch64-unknown-linux-gnu" + +define i8 @test_lane0_16xi8( %a) #0 { ; CHECK-LABEL: test_lane0_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.b, b0 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %b = extractelement %a, i32 0 ret i8 %b } -define i16 @test_lane0_8xi16( %a) { +define i8 @test_lane15_16xi8( %a) #0 { +; CHECK-LABEL: test_lane15_16xi8: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.b[15] +; CHECK-NEXT: ret + %b = extractelement %a, i32 15 + ret i8 %b +} + +define i8 @test_lane16_16xi8( %a) #0 { +; CHECK-LABEL: test_lane16_16xi8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.b, z0.b[16] +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 16 + ret i8 %b +} + +define i16 @test_lane0_8xi16( %a) #0 { ; CHECK-LABEL: test_lane0_8xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.h, h0 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %b = extractelement %a, i32 0 ret i16 %b } -define i32 @test_lane0_4xi32( %a) { +define i16 @test_lane7_8xi16( %a) #0 { +; CHECK-LABEL: test_lane7_8xi16: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.h[7] +; CHECK-NEXT: ret + %b = extractelement %a, i32 7 + ret i16 %b +} + +define i16 @test_lane8_8xi16( %a) #0 { +; CHECK-LABEL: test_lane8_8xi16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, z0.h[8] +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 8 + ret i16 %b +} + +define i32 @test_lane0_4xi32( %a) #0 { ; CHECK-LABEL: test_lane0_4xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %b = extractelement %a, i32 0 ret i32 %b } -define i64 @test_lane0_2xi64( %a) { +define i32 @test_lane3_4xi32( %a) #0 { +; CHECK-LABEL: test_lane3_4xi32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, v0.s[3] +; CHECK-NEXT: ret + %b = extractelement %a, i32 3 + ret i32 %b +} + +define i32 @test_lane4_4xi32( %a) #0 { +; CHECK-LABEL: test_lane4_4xi32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, z0.s[4] +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 4 + ret i32 %b +} + +define i64 @test_lane0_2xi64( %a) #0 { ; CHECK-LABEL: test_lane0_2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, d0 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %b = extractelement %a, i32 0 ret i64 %b } -define double @test_lane0_2xf64( %a) { -; CHECK-LABEL: test_lane0_2xf64: +define i64 @test_lane1_2xi64( %a) #0 { +; CHECK-LABEL: test_lane1_2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: mov x0, v0.d[1] ; CHECK-NEXT: ret - %b = extractelement %a, i32 0 - ret double %b + %b = extractelement %a, i32 1 + ret i64 %b } -define float @test_lane0_4xf32( %a) { -; CHECK-LABEL: test_lane0_4xf32: +define i64 @test_lane2_2xi64( %a) #0 { +; CHECK-LABEL: test_lane2_2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 +; CHECK-NEXT: mov z0.d, z0.d[2] +; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret - %b = extractelement %a, i32 0 - ret float %b + %b = extractelement %a, i32 2 + ret i64 %b } -define half @test_lane0_8xf16( %a) { +define half @test_lane0_8xf16( %a) #0 { ; CHECK-LABEL: test_lane0_8xf16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 @@ -72,7 +128,172 @@ ret half %b } -define i8 @test_lanex_16xi8( %a, i32 %x) { +define half @test_lane7_8xf16( %a) #0 { +; CHECK-LABEL: test_lane7_8xf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, z0.h[7] +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 7 + ret half %b +} + +define half @test_lane8_8xf16( %a) #0 { +; CHECK-LABEL: test_lane8_8xf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, z0.h[8] +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 8 + ret half %b +} + +define half @test_lane0_4xf16( %a) #0 { +; CHECK-LABEL: test_lane0_4xf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 0 + ret half %b +} + +define half @test_lane3_4xf16( %a) #0 { +; CHECK-LABEL: test_lane3_4xf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, z0.s[3] +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 3 + ret half %b +} + +define half @test_lane4_4xf16( %a) #0 { +; CHECK-LABEL: test_lane4_4xf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, z0.s[4] +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 4 + ret half %b +} + +define half @test_lane0_2xf16( %a) #0 { +; CHECK-LABEL: test_lane0_2xf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 0 + ret half %b +} + +define half @test_lane1_2xf16( %a) #0 { +; CHECK-LABEL: test_lane1_2xf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, z0.d[1] +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 1 + ret half %b +} + +define half @test_lane2_2xf16( %a) #0 { +; CHECK-LABEL: test_lane2_2xf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, z0.d[2] +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 2 + ret half %b +} + +define float @test_lane0_4xf32( %a) #0 { +; CHECK-LABEL: test_lane0_4xf32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 0 + ret float %b +} + +define float @test_lane3_4xf32( %a) #0 { +; CHECK-LABEL: test_lane3_4xf32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, z0.s[3] +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 3 + ret float %b +} + +define float @test_lane4_4xf32( %a) #0 { +; CHECK-LABEL: test_lane4_4xf32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, z0.s[4] +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 4 + ret float %b +} + +define float @test_lane0_2xf32( %a) #0 { +; CHECK-LABEL: test_lane0_2xf32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 0 + ret float %b +} + +define float @test_lane1_2xf32( %a) #0 { +; CHECK-LABEL: test_lane1_2xf32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, z0.d[1] +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 1 + ret float %b +} + +define float @test_lane2_2xf32( %a) #0 { +; CHECK-LABEL: test_lane2_2xf32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, z0.d[2] +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 2 + ret float %b +} + +define double @test_lane0_2xf64( %a) #0 { +; CHECK-LABEL: test_lane0_2xf64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 0 + ret double %b +} + +define double @test_lane1_2xf64( %a) #0 { +; CHECK-LABEL: test_lane1_2xf64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, z0.d[1] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 1 + ret double %b +} + +define double @test_lane2_2xf64( %a) #0 { +; CHECK-LABEL: test_lane2_2xf64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, z0.d[2] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 2 + ret double %b +} + +define i8 @test_lanex_16xi8( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_16xi8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -84,7 +305,7 @@ ret i8 %b } -define i16 @test_lanex_8xi16( %a, i32 %x) { +define i16 @test_lanex_8xi16( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_8xi16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -96,7 +317,7 @@ ret i16 %b } -define i32 @test_lanex_4xi32( %a, i32 %x) { +define i32 @test_lanex_4xi32( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_4xi32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -108,7 +329,7 @@ ret i32 %b } -define i64 @test_lanex_2xi64( %a, i32 %x) { +define i64 @test_lanex_2xi64( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_2xi64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 @@ -120,77 +341,89 @@ ret i64 %b } -define double @test_lanex_2xf64( %a, i32 %x) { -; CHECK-LABEL: test_lanex_2xf64: +define half @test_lanex_8xf16( %a, i32 %x) #0 { +; CHECK-LABEL: test_lanex_8xf16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxtw x8, w0 -; CHECK-NEXT: whilels p0.d, xzr, x8 -; CHECK-NEXT: lastb d0, p0, z0.d +; CHECK-NEXT: whilels p0.h, xzr, x8 +; CHECK-NEXT: lastb h0, p0, z0.h ; CHECK-NEXT: ret - %b = extractelement %a, i32 %x - ret double %b + %b = extractelement %a, i32 %x + ret half %b } -define float @test_lanex_4xf32( %a, i32 %x) { -; CHECK-LABEL: test_lanex_4xf32: +define half @test_lanex_4xf16( %a, i32 %x) #0 { +; CHECK-LABEL: test_lanex_4xf16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxtw x8, w0 ; CHECK-NEXT: whilels p0.s, xzr, x8 -; CHECK-NEXT: lastb s0, p0, z0.s +; CHECK-NEXT: lastb h0, p0, z0.h ; CHECK-NEXT: ret - %b = extractelement %a, i32 %x - ret float %b + %b = extractelement %a, i32 %x + ret half %b } -define half @test_lanex_8xf16( %a, i32 %x) { -; CHECK-LABEL: test_lanex_8xf16: +define half @test_lanex_2xf16( %a, i32 %x) #0 { +; CHECK-LABEL: test_lanex_2xf16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxtw x8, w0 -; CHECK-NEXT: whilels p0.h, xzr, x8 +; CHECK-NEXT: whilels p0.d, xzr, x8 ; CHECK-NEXT: lastb h0, p0, z0.h ; CHECK-NEXT: ret - %b = extractelement %a, i32 %x + %b = extractelement %a, i32 %x ret half %b } -; Deliberately choose an index that is out-of-bounds -define i8 @test_lane64_16xi8( %a) { -; CHECK-LABEL: test_lane64_16xi8: +define float @test_lanex_4xf32( %a, i32 %x) #0 { +; CHECK-LABEL: test_lanex_4xf32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64 -; CHECK-NEXT: whilels p0.b, xzr, x8 -; CHECK-NEXT: lastb w0, p0, z0.b +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: whilels p0.s, xzr, x8 +; CHECK-NEXT: lastb s0, p0, z0.s ; CHECK-NEXT: ret - %b = extractelement %a, i32 64 - ret i8 %b + %b = extractelement %a, i32 %x + ret float %b +} + +define float @test_lanex_2xf32( %a, i32 %x) #0 { +; CHECK-LABEL: test_lanex_2xf32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: whilels p0.d, xzr, x8 +; CHECK-NEXT: lastb s0, p0, z0.s +; CHECK-NEXT: ret + %b = extractelement %a, i32 %x + ret float %b } -define double @test_lane9_2xf64( %a) { -; CHECK-LABEL: test_lane9_2xf64: +define double @test_lanex_2xf64( %a, i32 %x) #0 { +; CHECK-LABEL: test_lanex_2xf64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x8, w0 ; CHECK-NEXT: whilels p0.d, xzr, x8 ; CHECK-NEXT: lastb d0, p0, z0.d ; CHECK-NEXT: ret - %b = extractelement %a, i32 9 + %b = extractelement %a, i32 %x ret double %b } ; Deliberately choose an index that is undefined -define i32 @test_lane64_4xi32( %a) { -; CHECK-LABEL: test_lane64_4xi32: +define i32 @test_undef_lane_4xi32( %a) #0 { +; CHECK-LABEL: test_undef_lane_4xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %b = extractelement %a, i32 undef ret i32 %b } -define i8 @extract_of_insert_undef_16xi8(i8 %a) { +define i8 @extract_of_insert_undef_16xi8(i8 %a) #0 { ; CHECK-LABEL: extract_of_insert_undef_16xi8: ; CHECK: // %bb.0: ; CHECK-NEXT: ret @@ -199,7 +432,7 @@ ret i8 %c } -define i8 @extract0_of_insert0_16xi8( %a, i8 %b) { +define i8 @extract0_of_insert0_16xi8( %a, i8 %b) #0 { ; CHECK-LABEL: extract0_of_insert0_16xi8: ; CHECK: // %bb.0: ; CHECK-NEXT: ret @@ -208,7 +441,7 @@ ret i8 %d } -define i8 @extract64_of_insert64_16xi8( %a, i8 %b) { +define i8 @extract64_of_insert64_16xi8( %a, i8 %b) #0 { ; CHECK-LABEL: extract64_of_insert64_16xi8: ; CHECK: // %bb.0: ; CHECK-NEXT: ret @@ -217,18 +450,17 @@ ret i8 %d } -define i8 @extract_of_insert_diff_lanes_16xi8( %a, i8 %b) { +define i8 @extract_of_insert_diff_lanes_16xi8( %a, i8 %b) #0 { ; CHECK-LABEL: extract_of_insert_diff_lanes_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.b, z0.b[3] -; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: umov w0, v0.b[3] ; CHECK-NEXT: ret %c = insertelement %a, i8 %b, i32 0 %d = extractelement %c, i32 3 ret i8 %d } -define i8 @test_lane0_zero_16xi8( %a) { +define i8 @test_lane0_zero_16xi8( %a) #0 { ; CHECK-LABEL: test_lane0_zero_16xi8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w0, wzr @@ -240,7 +472,7 @@ ; The DAG combiner should fold the extract of a splat to give element zero ; of the splat, i.e. %x. If the index is beyond the end of the scalable ; vector the result is undefined anyway. -define i64 @test_lanex_splat_2xi64(i64 %x, i32 %y) { +define i64 @test_lanex_splat_2xi64(i64 %x, i32 %y) #0 { ; CHECK-LABEL: test_lanex_splat_2xi64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret @@ -249,3 +481,5 @@ %c = extractelement %b, i32 %y ret i64 %c } + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll @@ -182,9 +182,8 @@ define @test_insert0_of_extract0_16xi8( %a, %b) { ; CHECK-LABEL: test_insert0_of_extract0_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.b, b1 -; CHECK-NEXT: ptrue p0.b, vl1 ; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: ptrue p0.b, vl1 ; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: ret %c = extractelement %b, i32 0 @@ -212,14 +211,13 @@ define @test_insert3_of_extract1_16xi8( %a, %b) { ; CHECK-LABEL: test_insert3_of_extract1_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.b, z1.b[1] -; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: index z2.b, #0, #1 -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: mov z1.b, w8 +; CHECK-NEXT: mov w9, #3 +; CHECK-NEXT: umov w8, v1.b[1] +; CHECK-NEXT: index z1.b, #0, #1 +; CHECK-NEXT: mov z2.b, w9 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z1.b -; CHECK-NEXT: mov z0.b, p0/m, w9 +; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b +; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: ret %c = extractelement %b, i32 1 %d = insertelement %a, i8 %c, i32 3 diff --git a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll --- a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll @@ -127,8 +127,7 @@ define i16 @promote_extract_4i16( %a) { ; CHECK-LABEL: promote_extract_4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.s, z0.s[1] -; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: mov w0, v0.s[1] ; CHECK-NEXT: ret %ext = extractelement %a, i32 1 ret i16 %ext @@ -137,8 +136,7 @@ define i8 @split_extract_32i8( %a) { ; CHECK-LABEL: split_extract_32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.b, z0.b[3] -; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: umov w0, v0.b[3] ; CHECK-NEXT: ret %ext = extractelement %a, i32 3 ret i8 %ext