diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -419,6 +419,7 @@ // Mask VTs are custom-expanded into a series of standard nodes setOperationAction(ISD::TRUNCATE, VT, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); } for (MVT VT : IntVecVTs) { @@ -537,12 +538,15 @@ setOperationAction(ISD::LOAD, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); + setOperationAction(ISD::SETCC, VT, Custom); + + setOperationAction(ISD::TRUNCATE, VT, Custom); + // Operations below are different for between masks and other vectors. if (VT.getVectorElementType() == MVT::i1) { setOperationAction(ISD::AND, VT, Custom); setOperationAction(ISD::OR, VT, Custom); setOperationAction(ISD::XOR, VT, Custom); - setOperationAction(ISD::SETCC, VT, Custom); continue; } @@ -578,7 +582,6 @@ setOperationAction(ISD::VSELECT, VT, Custom); - setOperationAction(ISD::TRUNCATE, VT, Custom); setOperationAction(ISD::ANY_EXTEND, VT, Custom); setOperationAction(ISD::SIGN_EXTEND, VT, Custom); setOperationAction(ISD::ZERO_EXTEND, VT, Custom); @@ -2119,28 +2122,35 @@ assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && "Unexpected type for vector mask lowering"); SDValue Src = Op.getOperand(0); - EVT VecVT = Src.getValueType(); - - // Be careful not to introduce illegal scalar types at this stage, and be - // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is - // illegal and must be expanded. Since we know that the constants are - // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. - bool IsRV32E64 = - !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; + MVT VecVT = Src.getSimpleValueType(); + + // If this is a fixed vector, we need to convert it to a scalable vector. + MVT ContainerVT = VecVT; + if (VecVT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget); + Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); + } + SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); - if (!IsRV32E64) { - SplatOne = DAG.getSplatVector(VecVT, DL, SplatOne); - SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); - } else { - SplatOne = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatOne); - SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); + SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne); + SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero); + + if (VecVT.isScalableVector()) { + SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); + return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); } - SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); + SDValue Mask, VL; + std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); - return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); + MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); + SDValue Trunc = + DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL); + Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero, + DAG.getCondCode(ISD::SETNE), Mask, VL); + return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); } SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, @@ -2511,6 +2521,43 @@ unsigned OrigIdx = Op.getConstantOperandVal(1); const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); + // We don't have the ability to slide mask vectors down indexed by their i1 + // elements; the smallest we can do is i8. Often we are able to bitcast to + // equivalent i8 vectors. Note that when extracting a fixed-length vector + // from a scalable one, we might not necessarily have enough scalable + // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid. + if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) { + if (VecVT.getVectorMinNumElements() >= 8 && + SubVecVT.getVectorMinNumElements() >= 8) { + assert(OrigIdx % 8 == 0 && "Invalid index"); + assert(VecVT.getVectorMinNumElements() % 8 == 0 && + SubVecVT.getVectorMinNumElements() % 8 == 0 && + "Unexpected mask vector lowering"); + OrigIdx /= 8; + SubVecVT = + MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, + SubVecVT.isScalableVector()); + VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, + VecVT.isScalableVector()); + Vec = DAG.getBitcast(VecVT, Vec); + } else { + // We can't slide this mask vector down, indexed by its i1 elements. + // This poses a problem when we wish to extract a scalable vector which + // can't be re-expressed as a larger type. Just choose the slow path and + // extend to a larger type, then truncate back down. + // TODO: We could probably improve this when extracting certain fixed + // from fixed, where we can extract as i8 and shift the correct element + // right to reach the desired subvector? + MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); + MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); + Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec, + Op.getOperand(1)); + SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT); + return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE); + } + } + // If the subvector vector is a fixed-length type, we cannot use subregister // manipulation to simplify the codegen; we don't know which register of a // LMUL group contains the specific subvector as we only know the minimum diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll @@ -357,6 +357,51 @@ ret %c } +define @extract_nxv64i1_nxv8i1_0( %mask) { +; CHECK-LABEL: extract_nxv64i1_nxv8i1_0: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %c = call @llvm.experimental.vector.extract.nxv8i1( %mask, i64 0) + ret %c +} + +define @extract_nxv64i1_nxv8i1_8( %mask) { +; CHECK-LABEL: extract_nxv64i1_nxv8i1_8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vslidedown.vx v0, v0, a0 +; CHECK-NEXT: ret + %c = call @llvm.experimental.vector.extract.nxv8i1( %mask, i64 8) + ret %c +} + +define @extract_nxv64i1_nxv2i1_0( %mask) { +; CHECK-LABEL: extract_nxv64i1_nxv2i1_0: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %c = call @llvm.experimental.vector.extract.nxv2i1( %mask, i64 0) + ret %c +} + +define @extract_nxv64i1_nxv2i1_2( %mask) { +; CHECK-LABEL: extract_nxv64i1_nxv2i1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: ret + %c = call @llvm.experimental.vector.extract.nxv2i1( %mask, i64 2) + ret %c +} + declare @llvm.experimental.vector.extract.nxv1i8.nxv8i8( %vec, i64 %idx) declare @llvm.experimental.vector.extract.nxv2i8.nxv32i8( %vec, i64 %idx) @@ -372,3 +417,6 @@ declare @llvm.experimental.vector.extract.nxv8i32.nxv16i32( %vec, i64 %idx) declare @llvm.experimental.vector.extract.nxv2f16.nxv16f16( %vec, i64 %idx) + +declare @llvm.experimental.vector.extract.nxv2i1( %vec, i64 %idx) +declare @llvm.experimental.vector.extract.nxv8i1( %vec, i64 %idx) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll @@ -152,6 +152,321 @@ ret void } +define void @extract_v8i1_v64i1_0(<64 x i1>* %x, <8 x i1>* %y) { +; LMULMAX2-LABEL: extract_v8i1_v64i1_0: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 32 +; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; LMULMAX2-NEXT: vle1.v v25, (a0) +; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX2-NEXT: vse1.v v25, (a1) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: extract_v8i1_v64i1_0: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vle1.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vse1.v v25, (a1) +; LMULMAX1-NEXT: ret + %a = load <64 x i1>, <64 x i1>* %x + %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 0) + store <8 x i1> %c, <8 x i1>* %y + ret void +} + +define void @extract_v8i1_v64i1_8(<64 x i1>* %x, <8 x i1>* %y) { +; LMULMAX2-LABEL: extract_v8i1_v64i1_8: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 32 +; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; LMULMAX2-NEXT: vle1.v v25, (a0) +; LMULMAX2-NEXT: vsetivli a0, 1, e8,m1,ta,mu +; LMULMAX2-NEXT: vslidedown.vi v25, v25, 1 +; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX2-NEXT: vse1.v v25, (a1) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: extract_v8i1_v64i1_8: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vle1.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 1, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 1 +; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vse1.v v25, (a1) +; LMULMAX1-NEXT: ret + %a = load <64 x i1>, <64 x i1>* %x + %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 8) + store <8 x i1> %c, <8 x i1>* %y + ret void +} + +define void @extract_v8i1_v64i1_48(<64 x i1>* %x, <8 x i1>* %y) { +; LMULMAX2-LABEL: extract_v8i1_v64i1_48: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a0, a0, 4 +; LMULMAX2-NEXT: addi a2, zero, 32 +; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; LMULMAX2-NEXT: vle1.v v25, (a0) +; LMULMAX2-NEXT: vsetivli a0, 1, e8,m1,ta,mu +; LMULMAX2-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX2-NEXT: vse1.v v25, (a1) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: extract_v8i1_v64i1_48: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a0, a0, 6 +; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vle1.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vse1.v v25, (a1) +; LMULMAX1-NEXT: ret + %a = load <64 x i1>, <64 x i1>* %x + %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 48) + store <8 x i1> %c, <8 x i1>* %y + ret void +} + +define void @extract_v8i1_nxv2i1_0( %x, <8 x i1>* %y) { +; CHECK-LABEL: extract_v8i1_nxv2i1_0: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v0, (a0) +; CHECK-NEXT: ret + %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv2i1( %x, i64 0) + store <8 x i1> %c, <8 x i1>* %y + ret void +} + +define void @extract_v8i1_nxv2i1_2( %x, <8 x i1>* %y) { +; CHECK-LABEL: extract_v8i1_nxv2i1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a1, 8, e8,mf4,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v25, 2 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vse1.v v26, (a0) +; CHECK-NEXT: ret + %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv2i1( %x, i64 2) + store <8 x i1> %c, <8 x i1>* %y + ret void +} + +define void @extract_v8i1_nxv64i1_0( %x, <8 x i1>* %y) { +; CHECK-LABEL: extract_v8i1_nxv64i1_0: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v0, (a0) +; CHECK-NEXT: ret + %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1( %x, i64 0) + store <8 x i1> %c, <8 x i1>* %y + ret void +} + +define void @extract_v8i1_nxv64i1_8( %x, <8 x i1>* %y) { +; CHECK-LABEL: extract_v8i1_nxv64i1_8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v0, 1 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1( %x, i64 8) + store <8 x i1> %c, <8 x i1>* %y + ret void +} + +define void @extract_v8i1_nxv64i1_48( %x, <8 x i1>* %y) { +; CHECK-LABEL: extract_v8i1_nxv64i1_48: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v0, 6 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1( %x, i64 48) + store <8 x i1> %c, <8 x i1>* %y + ret void +} + + +define void @extract_v2i1_v64i1_0(<64 x i1>* %x, <2 x i1>* %y) { +; LMULMAX2-LABEL: extract_v2i1_v64i1_0: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 32 +; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; LMULMAX2-NEXT: vle1.v v25, (a0) +; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX2-NEXT: vse1.v v25, (a1) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: extract_v2i1_v64i1_0: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vle1.v v25, (a0) +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vse1.v v25, (a1) +; LMULMAX1-NEXT: ret + %a = load <64 x i1>, <64 x i1>* %x + %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 0) + store <2 x i1> %c, <2 x i1>* %y + ret void +} + +define void @extract_v2i1_v64i1_2(<64 x i1>* %x, <2 x i1>* %y) { +; LMULMAX2-LABEL: extract_v2i1_v64i1_2: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 32 +; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; LMULMAX2-NEXT: vle1.v v0, (a0) +; LMULMAX2-NEXT: vmv.v.i v26, 0 +; LMULMAX2-NEXT: vmerge.vim v26, v26, 1, v0 +; LMULMAX2-NEXT: vsetivli a0, 2, e8,m2,ta,mu +; LMULMAX2-NEXT: vslidedown.vi v26, v26, 2 +; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX2-NEXT: vmsne.vi v25, v26, 0 +; LMULMAX2-NEXT: vse1.v v25, (a1) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: extract_v2i1_v64i1_2: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vle1.v v0, (a0) +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: vmerge.vim v25, v25, 1, v0 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 +; LMULMAX1-NEXT: vmsne.vi v26, v25, 0 +; LMULMAX1-NEXT: vse1.v v26, (a1) +; LMULMAX1-NEXT: ret + %a = load <64 x i1>, <64 x i1>* %x + %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 2) + store <2 x i1> %c, <2 x i1>* %y + ret void +} + +define void @extract_v2i1_v64i1_42(<64 x i1>* %x, <2 x i1>* %y) { +; LMULMAX2-LABEL: extract_v2i1_v64i1_42: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a0, a0, 4 +; LMULMAX2-NEXT: addi a2, zero, 32 +; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; LMULMAX2-NEXT: vle1.v v0, (a0) +; LMULMAX2-NEXT: vmv.v.i v26, 0 +; LMULMAX2-NEXT: vmerge.vim v26, v26, 1, v0 +; LMULMAX2-NEXT: vsetivli a0, 2, e8,m2,ta,mu +; LMULMAX2-NEXT: vslidedown.vi v26, v26, 10 +; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX2-NEXT: vmsne.vi v25, v26, 0 +; LMULMAX2-NEXT: vse1.v v25, (a1) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-LABEL: extract_v2i1_v64i1_42: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a0, a0, 4 +; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vle1.v v0, (a0) +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: vmerge.vim v25, v25, 1, v0 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vslidedown.vi v25, v25, 10 +; LMULMAX1-NEXT: vmsne.vi v26, v25, 0 +; LMULMAX1-NEXT: vse1.v v26, (a1) +; LMULMAX1-NEXT: ret + %a = load <64 x i1>, <64 x i1>* %x + %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 42) + store <2 x i1> %c, <2 x i1>* %y + ret void +} + +define void @extract_v2i1_nxv2i1_0( %x, <2 x i1>* %y) { +; CHECK-LABEL: extract_v2i1_nxv2i1_0: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v0, (a0) +; CHECK-NEXT: ret + %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1( %x, i64 0) + store <2 x i1> %c, <2 x i1>* %y + ret void +} + +define void @extract_v2i1_nxv2i1_2( %x, <2 x i1>* %y) { +; CHECK-LABEL: extract_v2i1_nxv2i1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a1, 2, e8,mf4,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v25, 2 +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vse1.v v26, (a0) +; CHECK-NEXT: ret + %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1( %x, i64 2) + store <2 x i1> %c, <2 x i1>* %y + ret void +} + +define void @extract_v2i1_nxv64i1_0( %x, <2 x i1>* %y) { +; CHECK-LABEL: extract_v2i1_nxv64i1_0: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vse1.v v0, (a0) +; CHECK-NEXT: ret + %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1( %x, i64 0) + store <2 x i1> %c, <2 x i1>* %y + ret void +} + +define void @extract_v2i1_nxv64i1_2( %x, <2 x i1>* %y) { +; CHECK-LABEL: extract_v2i1_nxv64i1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli a1, 2, e8,m8,ta,mu +; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v8, 0 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1( %x, i64 2) + store <2 x i1> %c, <2 x i1>* %y + ret void +} + +define void @extract_v2i1_nxv64i1_42( %x, <2 x i1>* %y) { +; CHECK-LABEL: extract_v2i1_nxv64i1_42: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: addi a1, zero, 42 +; CHECK-NEXT: vsetivli a2, 2, e8,m8,ta,mu +; CHECK-NEXT: vslidedown.vx v8, v8, a1 +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v8, 0 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1( %x, i64 42) + store <2 x i1> %c, <2 x i1>* %y + ret void +} + +declare <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %vec, i64 %idx) +declare <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %vec, i64 %idx) + +declare <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1( %vec, i64 %idx) +declare <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv2i1( %vec, i64 %idx) + +declare <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1( %vec, i64 %idx) +declare <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1( %vec, i64 %idx) + declare <2 x i8> @llvm.experimental.vector.extract.v2i8.v8i8(<8 x i8> %vec, i64 %idx) declare <2 x i32> @llvm.experimental.vector.extract.v2i32.v8i32(<8 x i32> %vec, i64 %idx)