Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -16181,6 +16181,43 @@ vector index constant type (for most targets this will be an integer pointer type). +'``llvm.experimental.vector.extract.elements``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare @llvm.experimental.vector.extract.elements.v4f32( %vec, i32 %index, i32 %stride) + declare @llvm.experimental.vector.extract.elements.v2f64( %vec, i32 %index, i32 %stride) + +Overview: +""""""""" + +The '``llvm.experimental.vector.extract.elements.*``' intrinsic extracts a +strided subvector of elements from the vector ``%vec``. The vector +length of the result type is determined by the vector length of ``%vec`` divided by ``%stride``. +The underlying primitive data type of the result type must match the underlying +primitive data type of the input vector ``%vec`` + +Arguments: +"""""""""" + +The first argument to this intrinsic must be a vector. If the first argument is +a scalable vector, the minimum known number of elements must be a power of two. + +The second argument to this intrinsic is a positive constant integer start +index. The start index is currently restricted to 0 or 1. + +The third argument to this intrinsic is a positive constant integer stride, +where every ``%stride``-th element will be extracted from the input vector. +The stride must be greater than or equal to the start index and must also be a +multiple of the vector length of ``%vec``. + + + Matrix Intrinsics ----------------- Index: llvm/include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- llvm/include/llvm/CodeGen/ISDOpcodes.h +++ llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -540,6 +540,13 @@ /// vector, but not the other way around. EXTRACT_SUBVECTOR, + /// VECTOR_EXTRACT_ELEMENTS(VEC, IDX, N) - Returns a subvector of the N-th + /// elements from the vector VEC, starting from index IDX. The result vector + /// type's primitive data type must match VEC's primitive data type. The + /// result type's vector length must match the input vector type's vector + /// length divided by STRIDE. + VECTOR_EXTRACT_ELEMENTS, + /// VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as /// VEC1/VEC2. A VECTOR_SHUFFLE node also contains an array of constant int /// values that indicate which value (or undef) each result element will Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -1633,8 +1633,12 @@ def int_experimental_vector_extract : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i64_ty], [IntrNoMem, ImmArg>]>; - -//===----------------------------------------------------------------------===// +def int_experimental_vector_extract_elements : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, + llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, + ImmArg>, + ImmArg>]>; //===----------------------------------------------------------------------===// // Target-specific intrinsics Index: llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -825,6 +825,7 @@ void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VECTOR_EXTRACT_ELEMENTS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -856,6 +857,7 @@ SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, unsigned OpNo); SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); + SDValue SplitVecOp_VECTOR_EXTRACT_ELEMENTS(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); SDValue SplitVecOp_FP_ROUND(SDNode *N); SDValue SplitVecOp_FCOPYSIGN(SDNode *N); Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -910,6 +910,9 @@ case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; + case ISD::VECTOR_EXTRACT_ELEMENTS: + SplitVecRes_VECTOR_EXTRACT_ELEMENTS(N, Lo, Hi); + break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; @@ -1304,6 +1307,23 @@ PtrInfo.getWithOffset(IncrementSize), SmallestAlign); } +void DAGTypeLegalizer::SplitVecRes_VECTOR_EXTRACT_ELEMENTS(SDNode *N, + SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + SDValue SrcLo, SrcHi; + GetSplitVector(N->getOperand(0), SrcLo, SrcHi); + SDValue Idx = N->getOperand(1); + SDValue Stride = N->getOperand(2); + + // FIXME: This only works it Idx is restricted to 0|1. Will need updating if + // we plan lift that restriction. + Lo = DAG.getNode(ISD::VECTOR_EXTRACT_ELEMENTS, dl, SrcLo.getValueType(), + SrcLo, Idx, Stride); + Hi = DAG.getNode(ISD::VECTOR_EXTRACT_ELEMENTS, dl, SrcHi.getValueType(), + SrcHi, Idx, Stride); +} + void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); @@ -2108,6 +2128,9 @@ case ISD::INSERT_SUBVECTOR: Res = SplitVecOp_INSERT_SUBVECTOR(N, OpNo); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; + case ISD::VECTOR_EXTRACT_ELEMENTS: + Res = SplitVecOp_VECTOR_EXTRACT_ELEMENTS(N); + break; case ISD::TRUNCATE: Res = SplitVecOp_TruncateHelper(N); break; @@ -2723,6 +2746,31 @@ return DAG.getBuildVector(N->getValueType(0), DL, Elts); } +SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_EXTRACT_ELEMENTS(SDNode *N) { + SDLoc DL(N); + + EVT ResVT = N->getValueType(0); + + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + SDValue Stride = N->getOperand(2); + SDLoc dl(N); + + // FIXME: This only works if we restrict Idx to 0|1. This will need updating + // if we lift that restriction. + SDValue Lo, Hi; + GetSplitVector(Vec, Lo, Hi); + + SDValue ResLo = DAG.getNode(ISD::VECTOR_EXTRACT_ELEMENTS, dl, ResVT, Lo, + Idx, Stride); + SDValue ResHi = DAG.getNode(ISD::VECTOR_EXTRACT_ELEMENTS, dl, ResVT, Hi, + Idx, Stride); + + // FIXME: CONCAT_VECTORS won't work on Scalable, I don't think. Need a splice + // or similar. + return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, ResLo, ResHi); +} + SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { // The result type is legal, but the input type is illegal. If splitting // ends up with the result type of each half still being legal, just Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6976,6 +6976,52 @@ setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index)); return; } + case Intrinsic::experimental_vector_extract_elements: { + auto DL = getCurSDLoc(); + + SDValue Src = getValue(I.getOperand(0)); + SDValue Idx = getValue(I.getOperand(1)); + SDValue Stride = getValue(I.getOperand(2)); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + EVT InVT = Src.getValueType(); + + if (VT.isScalableVector()) { + SDValue Res = DAG.getNode(ISD::VECTOR_EXTRACT_ELEMENTS, + DL, VT, Src, Idx, Stride); + setValue(&I, Res); + return; + } + + assert(InVT.isFixedLengthVector() && + "Unexpected scalable vector in vector_extract_evens!"); + + // If a FixedLengthVector, canonicalize to a SHUFFLE_VECTOR with a strided + // mask. + ConstantSDNode *CIdx = dyn_cast(Idx); + ConstantSDNode *CStride = dyn_cast(Stride); + assert(CIdx && CStride && + "Expected an immediate argument in vector_extract_evens!"); + + ElementCount InEC = InVT.getVectorElementCount(); + unsigned InNumElts = InEC.getKnownMinValue(); + ElementCount ResEC = VT.getVectorElementCount(); + unsigned ResNumElts = ResEC.getKnownMinValue(); + + uint64_t idx = CIdx->getZExtValue(); + uint64_t stride = CStride->getZExtValue(); + SmallVector Mask(InNumElts, -1); + for (unsigned i = 0; i < ResNumElts; ++i) + Mask[i] = i*stride + idx; + + SDValue Res = DAG.getVectorShuffle(InVT, DL, Src, DAG.getUNDEF(InVT), Mask); + + if (ResNumElts != InNumElts) + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, + DAG.getVectorIdxConstant(0, DL)); + + setValue(&I, Res); + return; + } } } Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -286,6 +286,7 @@ case ISD::CONCAT_VECTORS: return "concat_vectors"; case ISD::INSERT_SUBVECTOR: return "insert_subvector"; case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; + case ISD::VECTOR_EXTRACT_ELEMENTS: return "extract_elements"; case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; case ISD::SPLAT_VECTOR: return "splat_vector"; Index: llvm/lib/IR/Verifier.cpp =================================================================== --- llvm/lib/IR/Verifier.cpp +++ llvm/lib/IR/Verifier.cpp @@ -5163,6 +5163,32 @@ &Call); break; } + case Intrinsic::experimental_vector_extract_elements: { + VectorType *ResTy = cast(Call.getType()); + VectorType *VecTy = cast(Call.getArgOperand(0)->getType()); + ConstantInt *CIdx = dyn_cast(Call.getArgOperand(1)); + ConstantInt *CStride = dyn_cast(Call.getArgOperand(2)); + ElementCount ResEC = ResTy->getElementCount(); + ElementCount VecEC = VecTy->getElementCount(); + + Assert(CIdx && CStride && + 0 <= CIdx->getZExtValue() && + CIdx->getZExtValue() < CStride->getZExtValue(), + "experimental_vector_extract_elements expects a constant index " + "and stride, where stride >= index."); + + Assert(ResEC == VecEC.divideCoefficientBy(CStride->getZExtValue()) && + VecEC.isKnownMultipleOf(CStride->getZExtValue()), + "experimental_vector_extract_elements input vector type must be a " + "multiple of the result type.", + &Call); + + Assert(!VecEC.isScalable() || isPowerOf2_64(VecEC.getKnownMinValue()), + "experimental_vector_extract_elements expects the known minimum " + "number of elements to be a power of two for scalable vector types.", + &Call); + break; + } }; } Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -932,6 +932,7 @@ SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const; + SDValue LowerVectorExtractElements(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef LoadOps, EVT VT, SelectionDAG &DAG, const SDLoc &DL) const; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1105,6 +1105,7 @@ setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); + setOperationAction(ISD::VECTOR_EXTRACT_ELEMENTS, VT, Custom); } // Illegal unpacked integer vector types. @@ -1134,6 +1135,7 @@ MVT::nxv4f32, MVT::nxv2f64}) { setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_EXTRACT_ELEMENTS, VT, Custom); setOperationAction(ISD::MGATHER, VT, Custom); setOperationAction(ISD::MSCATTER, VT, Custom); setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); @@ -1518,6 +1520,13 @@ return TLO.CombineTo(Op, New); } +/// getExtFactor - Determine the adjustment factor for the position when +/// generating an "extract from vector registers" instruction. +static unsigned getExtFactor(SDValue &V) { + EVT EltType = V.getValueType().getVectorElementType(); + return EltType.getSizeInBits() / 8; +} + bool AArch64TargetLowering::targetShrinkDemandedConstant( SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const { @@ -4413,6 +4422,8 @@ /*OverrideNEON=*/true); case ISD::CTTZ: return LowerCTTZ(Op, DAG); + case ISD::VECTOR_EXTRACT_ELEMENTS: + return LowerVectorExtractElements(Op, DAG); } } @@ -7358,6 +7369,51 @@ return DAG.getMergeValues(Ops, dl); } +SDValue +AArch64TargetLowering::LowerVectorExtractElements(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + assert(VT.isScalableVector() && + "Unexpected fixed length vector in LowerVectorExtractElements!"); + + SDValue Src = Op.getOperand(0); + uint64_t Idx = Op.getConstantOperandVal(1); + uint64_t Stride = Op.getConstantOperandVal(2); + + SDValue Undef = DAG.getUNDEF(VT); + SDValue Res; + switch (Stride) { + default: + report_fatal_error("Unhandled Stride in LowerVectorExtractElements!"); + case 0: { + SDValue Elt = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, VT.getScalarType(), Src, + DAG.getConstant(Idx, DL, MVT::i64)); + Res = DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, Elt); + break; + } + case 1: { + Res = DAG.getNode(AArch64ISD::EXT, DL, VT, Src, Undef, + DAG.getConstant(Idx * getExtFactor(Src), DL, MVT::i64)); + break; + } + case 2: { + assert(Idx <= 1 && + "LowerVectorExtractElements currently only handles a 0|1 index!"); + // FIXME + break; + } + case 4: { + assert(Idx <= 1 && + "LowerVectorExtractElements currently only handles a 0|1 index!"); + // FIXME + break; + } + } + + return Res; +} + bool AArch64TargetLowering::isOffsetFoldingLegal( const GlobalAddressSDNode *GA) const { // Offsets are folded in the DAG combine rather than here so that we can @@ -7897,13 +7953,6 @@ V64Reg, DAG.getConstant(0, DL, MVT::i32)); } -/// getExtFactor - Determine the adjustment factor for the position when -/// generating an "extract from vector registers" instruction. -static unsigned getExtFactor(SDValue &V) { - EVT EltType = V.getValueType().getVectorElementType(); - return EltType.getSizeInBits() / 8; -} - /// NarrowVector - Given a value in the V128 register class, produce the /// equivalent value in the V64 register class. static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { Index: llvm/test/CodeGen/AArch64/sve-vector-extract-elements.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-vector-extract-elements.ll @@ -0,0 +1,114 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s --check-prefixes=CHECK +; RUN: FileCheck --check-prefix=WARN --allow-empty %s < %t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +;; Integer types + +define <8 x i8> @extract_elements_v16i8(<16 x i8> %vec) nounwind { +; CHECK-LABEL: extract_elements_v16i8: +; CHECK-NEXT: ret +%retval = call <8 x i8> @llvm.experimental.vector.extract.elements.v16i8(<16 x i8> %vec, i32 0, i32 2) +ret <8 x i8> %retval +} + +define <4 x i16> @extract_elements_v8i16(<8 x i16> %vec) nounwind { +; CHECK-LABEL: extract_elements_v8i16: +; CHECK-NEXT: ret +%retval = call <4 x i16> @llvm.experimental.vector.extract.elements.v8i16(<8 x i16> %vec, i32 0, i32 2) +ret <4 x i16> %retval +} + +define <2 x i32> @extract_elements_v4i32(<4 x i32> %vec) nounwind { +; CHECK-LABEL: extract_elements_v4i32: +; CHECK-NEXT: ret +%retval = call <2 x i32> @llvm.experimental.vector.extract.elements.v4i32(<4 x i32> %vec, i32 0, i32 2) +ret <2 x i32> %retval +} + +define <1 x i64> @extract_elements_v2i64(<2 x i64> %vec) nounwind { +; CHECK-LABEL: extract_elements_v2i64: +; CHECK-NEXT: ret +%retval = call <1 x i64> @llvm.experimental.vector.extract.elements.v2i64(<2 x i64> %vec, i32 0, i32 2) +ret <1 x i64> %retval +} + +define @extract_elements_nxv8i32( %vec) nounwind { +; CHECK-LABEL: extract_elements_nxv8i32: +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.extract.elements.nxv8i32( %vec, i32 0, i32 2) +ret %retval +} + +define @extract_elements_nxv4i64( %vec) nounwind { +; CHECK-LABEL: extract_elements_nxv4i64: +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.extract.elements.nxv4i64( %vec, i32 0, i32 2) +ret %retval +} + + +;; Floating Point types + +define <1 x float> @extract_elements_v2f32(<2 x float> %vec) nounwind { +; CHECK-LABEL: extract_elements_v2f32: +; CHECK-NEXT: ret +%retval = call <1 x float> @llvm.experimental.vector.extract.elements.v2f32(<2 x float> %vec, i32 0, i32 2) +ret <1 x float> %retval +} + +define <2 x float> @extract_elements_v4f32(<4 x float> %vec) nounwind { +; CHECK-LABEL: extract_elements_v4f32: +; CHECK-NEXT: ret +%retval = call <2 x float> @llvm.experimental.vector.extract.elements.v4f32(<4 x float> %vec, i32 0, i32 2) +ret <2 x float> %retval +} + +define <1 x double> @extract_elements_v2f64(<2 x double> %vec) nounwind { +; CHECK-LABEL: extract_elements_v2f64: +; CHECK-NEXT: ret +%retval = call <1 x double> @llvm.experimental.vector.extract.elements.v2f64(<2 x double> %vec, i32 0, i32 2) +ret <1 x double> %retval +} + +define @extract_elements_nxv16f16( %vec) nounwind { +; CHECK-LABEL: extract_elements_nxv16f16: +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.extract.elements.nxv16f16( %vec, i32 0, i32 2) +ret %retval +} + +define @extract_elements_nxv8f32( %vec) nounwind { +; CHECK-LABEL: extract_elements_nxv8f32: +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.extract.elements.nxv8f32( %vec, i32 0, i32 2) +ret %retval +} + +define @extract_elements_nxv4f64( %vec) nounwind { +; CHECK-LABEL: extract_elements_nxv4f64: +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.extract.elements.nxv4f64( %vec, i32 0, i32 2) +ret %retval +} + + +; Integer declarations +declare <8 x i8> @llvm.experimental.vector.extract.elements.v16i8(<16 x i8>, i32, i32) +declare <4 x i16> @llvm.experimental.vector.extract.elements.v8i16(<8 x i16>, i32, i32) +declare <2 x i32> @llvm.experimental.vector.extract.elements.v4i32(<4 x i32>, i32, i32) +declare <1 x i64> @llvm.experimental.vector.extract.elements.v2i64(<2 x i64>, i32, i32) +declare @llvm.experimental.vector.extract.elements.nxv32i8(, i32, i32) +declare @llvm.experimental.vector.extract.elements.nxv16i16(, i32, i32) +declare @llvm.experimental.vector.extract.elements.nxv8i32(, i32, i32) +declare @llvm.experimental.vector.extract.elements.nxv4i64(, i32, i32) + + +; Floating point declarations +declare <4 x half> @llvm.experimental.vector.extract.elements.v8f16(<8 x half>, i32, i32) +declare <2 x float> @llvm.experimental.vector.extract.elements.v4f32(<4 x float>, i32, i32) +declare <1 x double> @llvm.experimental.vector.extract.elements.v2f64(<2 x double>, i32, i32) +declare @llvm.experimental.vector.extract.elements.nxv16f16(, i32, i32) +declare @llvm.experimental.vector.extract.elements.nxv8f32(, i32, i32) +declare @llvm.experimental.vector.extract.elements.nxv4f64(, i32, i32)