diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1185,6 +1185,7 @@ setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); // There are no legal MVT::nxv16f## based types. if (VT != MVT::nxv16i1) { @@ -10073,8 +10074,21 @@ AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!"); - EVT VT = Op.getOperand(0).getValueType(); + + if (VT.getScalarType() == MVT::i1) { + // We can't directly extract from an SVE predicate; extend it first. + // (This isn't the only possible lowering, but it's straightforward.) + EVT VectorVT = getPromotedVTForPredicate(VT); + SDLoc DL(Op); + SDValue Extend = + DAG.getNode(ISD::ANY_EXTEND, DL, VectorVT, Op.getOperand(0)); + MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32; + SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractTy, + Extend, Op.getOperand(1)); + return DAG.getAnyExtOrTrunc(Extract, DL, Op.getValueType()); + } + if (useSVEForFixedLengthVectorVT(VT)) return LowerFixedLengthExtractVectorElt(Op, DAG); @@ -10083,7 +10097,6 @@ if (!CI || CI->getZExtValue() >= VT.getVectorNumElements()) return SDValue(); - // Insertion/extraction are legal for V128 types. if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 || diff --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll @@ -478,4 +478,53 @@ ret i64 %c } +define i1 @test_lane0_16xi1( %a) #0 { +; CHECK-LABEL: test_lane0_16xi1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %b = extractelement %a, i32 0 + ret i1 %b +} + +define i1 @test_lane9_8xi1( %a) #0 { +; CHECK-LABEL: test_lane9_8xi1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z0.h, z0.h[9] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %b = extractelement %a, i32 9 + ret i1 %b +} + +define i1 @test_lanex_4xi1( %a, i32 %x) #0 { +; CHECK-LABEL: test_lanex_4xi1: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: whilels p1.s, xzr, x8 +; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 +; CHECK-NEXT: lastb w8, p1, z0.s +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %b = extractelement %a, i32 %x + ret i1 %b +} + +define i1 @test_lane4_2xi1( %a) #0 { +; CHECK-LABEL: test_lane4_2xi1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z0.d, z0.d[4] +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %b = extractelement %a, i32 4 + ret i1 %b +} + attributes #0 = { "target-features"="+sve" }