diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -944,6 +944,7 @@ // D68877 for more details. for (MVT VT : MVT::integer_scalable_vector_valuetypes()) { if (isTypeLegal(VT)) { + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::MUL, VT, Custom); setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); @@ -975,6 +976,7 @@ for (MVT VT : MVT::fp_scalable_vector_valuetypes()) { if (isTypeLegal(VT)) { + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); @@ -9041,10 +9043,40 @@ // Check for non-constant or out of range lane. EVT VT = Op.getOperand(0).getValueType(); ConstantSDNode *CI = dyn_cast(Op.getOperand(1)); + + if (VT.isScalableVector()) { + // Ignore extracts from predicates and unpacked vectors. + if (VT == MVT::nxv2i1 || VT == MVT::nxv4i1 || VT == MVT::nxv8i1 || + VT == MVT::nxv16i1 || VT == MVT::nxv2f16 || VT == MVT::nxv4f16 || + VT == MVT::nxv2f32 || VT == MVT::nxv2bf16 || VT == MVT::nxv4bf16) + return Op; + + if (VT != MVT::nxv16i8 && VT != MVT::nxv8i16 && VT != MVT::nxv4i32 && + VT != MVT::nxv2i64 && VT != MVT::nxv8f16 && VT != MVT::nxv4f32 && + VT != MVT::nxv2f64 && VT != MVT::nxv8bf16) + return SDValue(); + + // If the requested element is within the NEON part of an SVE register we + // can use more capable NEON instructions to do the work. + unsigned KnownMinNumElts = VT.getVectorElementCount().getKnownMinValue(); + if (!CI || CI->getZExtValue() >= KnownMinNumElts) + return Op; + + SDLoc DL(Op); + // ValueType for NEON part of the SVE input. + EVT SubVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + KnownMinNumElts); + assert(isTypeLegal(SubVT) && "Unexpected illegal subtype for extract!"); + SDValue Bottom128 = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Op.getOperand(0), + DAG.getConstant(0, DL, MVT::i64)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(), + Bottom128, Op.getOperand(1)); + } + if (!CI || CI->getZExtValue() >= VT.getVectorNumElements()) return SDValue(); - // Insertion/extraction are legal for V128 types. if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 || diff --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll @@ -8,8 +8,7 @@ define i8 @test_lane0_16xi8( %a) { ; CHECK-LABEL: test_lane0_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.b, b0 -; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: umov w0, v0.b[0] ; CHECK-NEXT: ret %b = extractelement %a, i32 0 ret i8 %b @@ -18,8 +17,7 @@ define i16 @test_lane0_8xi16( %a) { ; CHECK-LABEL: test_lane0_8xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.h, h0 -; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: umov w0, v0.h[0] ; CHECK-NEXT: ret %b = extractelement %a, i32 0 ret i16 %b @@ -28,7 +26,6 @@ define i32 @test_lane0_4xi32( %a) { ; CHECK-LABEL: test_lane0_4xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %b = extractelement %a, i32 0 @@ -38,7 +35,6 @@ define i64 @test_lane0_2xi64( %a) { ; CHECK-LABEL: test_lane0_2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, d0 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %b = extractelement %a, i32 0 @@ -72,6 +68,69 @@ ret half %b } +define i8 @test_lane15_16xi8( %a) { +; CHECK-LABEL: test_lane15_16xi8: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.b[15] +; CHECK-NEXT: ret + %b = extractelement %a, i32 15 + ret i8 %b +} + +define i16 @test_lane7_8xi16( %a) { +; CHECK-LABEL: test_lane7_8xi16: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.h[7] +; CHECK-NEXT: ret + %b = extractelement %a, i32 7 + ret i16 %b +} + +define i32 @test_lane3_4xi32( %a) { +; CHECK-LABEL: test_lane3_4xi32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, v0.s[3] +; CHECK-NEXT: ret + %b = extractelement %a, i32 3 + ret i32 %b +} + +define i64 @test_lane1_2xi64( %a) { +; CHECK-LABEL: test_lane1_2xi64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, v0.d[1] +; CHECK-NEXT: ret + %b = extractelement %a, i32 1 + ret i64 %b +} + +define double @test_lane1_2xf64( %a) { +; CHECK-LABEL: test_lane1_2xf64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: ret + %b = extractelement %a, i32 1 + ret double %b +} + +define float @test_lane3_4xf32( %a) { +; CHECK-LABEL: test_lane3_4xf32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov s0, v0.s[3] +; CHECK-NEXT: ret + %b = extractelement %a, i32 3 + ret float %b +} + +define half @test_lane7_8xf16( %a) { +; CHECK-LABEL: test_lane7_8xf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov h0, v0.h[7] +; CHECK-NEXT: ret + %b = extractelement %a, i32 7 + ret half %b +} + define i8 @test_lanex_16xi8( %a, i32 %x) { ; CHECK-LABEL: test_lanex_16xi8: ; CHECK: // %bb.0: @@ -183,7 +242,6 @@ define i32 @test_lane64_4xi32( %a) { ; CHECK-LABEL: test_lane64_4xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %b = extractelement %a, i32 undef @@ -220,8 +278,7 @@ define i8 @extract_of_insert_diff_lanes_16xi8( %a, i8 %b) { ; CHECK-LABEL: extract_of_insert_diff_lanes_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.b, z0.b[3] -; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: umov w0, v0.b[3] ; CHECK-NEXT: ret %c = insertelement %a, i8 %b, i32 0 %d = extractelement %c, i32 3 diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll @@ -182,9 +182,8 @@ define @test_insert0_of_extract0_16xi8( %a, %b) { ; CHECK-LABEL: test_insert0_of_extract0_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.b, b1 +; CHECK-NEXT: umov w8, v1.b[0] ; CHECK-NEXT: ptrue p0.b, vl1 -; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: ret %c = extractelement %b, i32 0 @@ -212,14 +211,13 @@ define @test_insert3_of_extract1_16xi8( %a, %b) { ; CHECK-LABEL: test_insert3_of_extract1_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.b, z1.b[1] -; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: index z2.b, #0, #1 -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: mov z1.b, w8 +; CHECK-NEXT: mov w9, #3 +; CHECK-NEXT: umov w8, v1.b[1] +; CHECK-NEXT: index z1.b, #0, #1 +; CHECK-NEXT: mov z2.b, w9 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z1.b -; CHECK-NEXT: mov z0.b, p0/m, w9 +; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b +; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: ret %c = extractelement %b, i32 1 %d = insertelement %a, i8 %c, i32 3 diff --git a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll --- a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll @@ -127,8 +127,7 @@ define i16 @promote_extract_4i16( %a) { ; CHECK-LABEL: promote_extract_4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.s, z0.s[1] -; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: mov w0, v0.s[1] ; CHECK-NEXT: ret %ext = extractelement %a, i32 1 ret i16 %ext @@ -137,8 +136,7 @@ define i8 @split_extract_32i8( %a) { ; CHECK-LABEL: split_extract_32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.b, z0.b[3] -; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: umov w0, v0.b[3] ; CHECK-NEXT: ret %ext = extractelement %a, i32 3 ret i8 %ext