diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5157,6 +5157,16 @@ MVT XLenVT = Subtarget.getXLenVT(); if (VecVT.getVectorElementType() == MVT::i1) { + // Use vcpop.m to extract the first bit. + if (isNullConstant(Idx)) { + MVT ContainerVT = VecVT; + if (VecVT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(VecVT); + Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); + } + auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget); + return DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); + } if (VecVT.isFixedLengthVector()) { unsigned NumElts = VecVT.getVectorNumElements(); if (NumElts >= 8) { diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll @@ -219,10 +219,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmseq.vi v8, v8, 0 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: ret %a = load , * %x %b = icmp eq %a, zeroinitializer @@ -235,10 +234,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmseq.vi v8, v8, 0 +; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: ret %a = load , * %x %b = icmp eq %a, zeroinitializer @@ -251,10 +249,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmseq.vi v8, v8, 0 +; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: ret %a = load , * %x %b = icmp eq %a, zeroinitializer @@ -267,10 +264,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vl1r.v v8, (a0) ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmseq.vi v8, v8, 0 +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: ret %a = load , * %x %b = icmp eq %a, zeroinitializer @@ -283,10 +279,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vl2r.v v8, (a0) ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmseq.vi v10, v8, 0 +; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; CHECK-NEXT: vcpop.m a0, v10 ; CHECK-NEXT: ret %a = load , * %x %b = icmp eq %a, zeroinitializer @@ -299,10 +294,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vl4r.v v8, (a0) ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmseq.vi v12, v8, 0 +; CHECK-NEXT: vsetivli zero, 1, e8, m4, ta, ma +; CHECK-NEXT: vcpop.m a0, v12 ; CHECK-NEXT: ret %a = load , * %x %b = icmp eq %a, zeroinitializer @@ -315,10 +309,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vl8r.v v8, (a0) ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmseq.vi v16, v8, 0 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma +; CHECK-NEXT: vcpop.m a0, v16 ; CHECK-NEXT: ret %a = load , * %x %b = icmp eq %a, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll @@ -453,10 +453,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmseq.vi v8, v8, 0 +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: ret %a = load <1 x i8>, <1 x i8>* %x %b = icmp eq <1 x i8> %a, zeroinitializer @@ -469,10 +467,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmseq.vi v8, v8, 0 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: ret %a = load <2 x i8>, <2 x i8>* %x %b = icmp eq <2 x i8> %a, zeroinitializer @@ -485,10 +482,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmseq.vi v8, v8, 0 +; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: ret %a = load <4 x i8>, <4 x i8>* %x %b = icmp eq <4 x i8> %a, zeroinitializer @@ -502,8 +498,8 @@ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmseq.vi v8, v8, 0 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: ret %a = load <8 x i8>, <8 x i8>* %x %b = icmp eq <8 x i8> %a, zeroinitializer @@ -517,9 +513,8 @@ ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmseq.vi v8, v8, 0 -; CHECK-NEXT: vsetivli zero, 0, e16, mf4, ta, ma -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = icmp eq <16 x i8> %a, zeroinitializer @@ -534,9 +529,8 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmseq.vi v10, v8, 0 -; CHECK-NEXT: vsetivli zero, 0, e32, mf2, ta, ma -; CHECK-NEXT: vmv.x.s a0, v10 -; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; CHECK-NEXT: vcpop.m a0, v10 ; CHECK-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = icmp eq <32 x i8> %a, zeroinitializer @@ -545,49 +539,15 @@ } define i1 @extractelt_v64i1_idx0(<64 x i8>* %x) nounwind { -; RV32-LABEL: extractelt_v64i1_idx0: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 64 -; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: vmseq.vi v12, v8, 0 -; RV32-NEXT: vsetivli zero, 0, e32, mf2, ta, ma -; RV32-NEXT: vmv.x.s a0, v12 -; RV32-NEXT: andi a0, a0, 1 -; RV32-NEXT: ret -; -; RV64-LABEL: extractelt_v64i1_idx0: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 64 -; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV64-NEXT: vle8.v v8, (a0) -; RV64-NEXT: vmseq.vi v12, v8, 0 -; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV64-NEXT: vmv.x.s a0, v12 -; RV64-NEXT: andi a0, a0, 1 -; RV64-NEXT: ret -; -; RV32ZBS-LABEL: extractelt_v64i1_idx0: -; RV32ZBS: # %bb.0: -; RV32ZBS-NEXT: li a1, 64 -; RV32ZBS-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV32ZBS-NEXT: vle8.v v8, (a0) -; RV32ZBS-NEXT: vmseq.vi v12, v8, 0 -; RV32ZBS-NEXT: vsetivli zero, 0, e32, mf2, ta, ma -; RV32ZBS-NEXT: vmv.x.s a0, v12 -; RV32ZBS-NEXT: andi a0, a0, 1 -; RV32ZBS-NEXT: ret -; -; RV64ZBS-LABEL: extractelt_v64i1_idx0: -; RV64ZBS: # %bb.0: -; RV64ZBS-NEXT: li a1, 64 -; RV64ZBS-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV64ZBS-NEXT: vle8.v v8, (a0) -; RV64ZBS-NEXT: vmseq.vi v12, v8, 0 -; RV64ZBS-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV64ZBS-NEXT: vmv.x.s a0, v12 -; RV64ZBS-NEXT: andi a0, a0, 1 -; RV64ZBS-NEXT: ret +; CHECK-LABEL: extractelt_v64i1_idx0: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmseq.vi v12, v8, 0 +; CHECK-NEXT: vsetivli zero, 1, e8, m4, ta, ma +; CHECK-NEXT: vcpop.m a0, v12 +; CHECK-NEXT: ret %a = load <64 x i8>, <64 x i8>* %x %b = icmp eq <64 x i8> %a, zeroinitializer %c = extractelement <64 x i1> %b, i64 0 @@ -595,49 +555,15 @@ } define i1 @extractelt_v128i1_idx0(<128 x i8>* %x) nounwind { -; RV32-LABEL: extractelt_v128i1_idx0: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 128 -; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: vmseq.vi v16, v8, 0 -; RV32-NEXT: vsetivli zero, 0, e32, m1, ta, ma -; RV32-NEXT: vmv.x.s a0, v16 -; RV32-NEXT: andi a0, a0, 1 -; RV32-NEXT: ret -; -; RV64-LABEL: extractelt_v128i1_idx0: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 128 -; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV64-NEXT: vle8.v v8, (a0) -; RV64-NEXT: vmseq.vi v16, v8, 0 -; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV64-NEXT: vmv.x.s a0, v16 -; RV64-NEXT: andi a0, a0, 1 -; RV64-NEXT: ret -; -; RV32ZBS-LABEL: extractelt_v128i1_idx0: -; RV32ZBS: # %bb.0: -; RV32ZBS-NEXT: li a1, 128 -; RV32ZBS-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV32ZBS-NEXT: vle8.v v8, (a0) -; RV32ZBS-NEXT: vmseq.vi v16, v8, 0 -; RV32ZBS-NEXT: vsetivli zero, 0, e32, m1, ta, ma -; RV32ZBS-NEXT: vmv.x.s a0, v16 -; RV32ZBS-NEXT: andi a0, a0, 1 -; RV32ZBS-NEXT: ret -; -; RV64ZBS-LABEL: extractelt_v128i1_idx0: -; RV64ZBS: # %bb.0: -; RV64ZBS-NEXT: li a1, 128 -; RV64ZBS-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV64ZBS-NEXT: vle8.v v8, (a0) -; RV64ZBS-NEXT: vmseq.vi v16, v8, 0 -; RV64ZBS-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV64ZBS-NEXT: vmv.x.s a0, v16 -; RV64ZBS-NEXT: andi a0, a0, 1 -; RV64ZBS-NEXT: ret +; CHECK-LABEL: extractelt_v128i1_idx0: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmseq.vi v16, v8, 0 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma +; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: ret %a = load <128 x i8>, <128 x i8>* %x %b = icmp eq <128 x i8> %a, zeroinitializer %c = extractelement <128 x i1> %b, i64 0 @@ -645,49 +571,15 @@ } define i1 @extractelt_v256i1_idx0(<256 x i8>* %x) nounwind { -; RV32-LABEL: extractelt_v256i1_idx0: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 128 -; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: vmseq.vi v16, v8, 0 -; RV32-NEXT: vsetivli zero, 0, e32, m1, ta, ma -; RV32-NEXT: vmv.x.s a0, v16 -; RV32-NEXT: andi a0, a0, 1 -; RV32-NEXT: ret -; -; RV64-LABEL: extractelt_v256i1_idx0: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 128 -; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV64-NEXT: vle8.v v8, (a0) -; RV64-NEXT: vmseq.vi v16, v8, 0 -; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV64-NEXT: vmv.x.s a0, v16 -; RV64-NEXT: andi a0, a0, 1 -; RV64-NEXT: ret -; -; RV32ZBS-LABEL: extractelt_v256i1_idx0: -; RV32ZBS: # %bb.0: -; RV32ZBS-NEXT: li a1, 128 -; RV32ZBS-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV32ZBS-NEXT: vle8.v v8, (a0) -; RV32ZBS-NEXT: vmseq.vi v16, v8, 0 -; RV32ZBS-NEXT: vsetivli zero, 0, e32, m1, ta, ma -; RV32ZBS-NEXT: vmv.x.s a0, v16 -; RV32ZBS-NEXT: andi a0, a0, 1 -; RV32ZBS-NEXT: ret -; -; RV64ZBS-LABEL: extractelt_v256i1_idx0: -; RV64ZBS: # %bb.0: -; RV64ZBS-NEXT: li a1, 128 -; RV64ZBS-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV64ZBS-NEXT: vle8.v v8, (a0) -; RV64ZBS-NEXT: vmseq.vi v16, v8, 0 -; RV64ZBS-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV64ZBS-NEXT: vmv.x.s a0, v16 -; RV64ZBS-NEXT: andi a0, a0, 1 -; RV64ZBS-NEXT: ret +; CHECK-LABEL: extractelt_v256i1_idx0: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmseq.vi v16, v8, 0 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma +; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: ret %a = load <256 x i8>, <256 x i8>* %x %b = icmp eq <256 x i8> %a, zeroinitializer %c = extractelement <256 x i1> %b, i64 0