diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5311,6 +5311,10 @@ if (Operand.getValueType().getScalarType() == MVT::i1) return getNOT(DL, Operand, Operand.getValueType()); break; + case ISD::VECREDUCE_ADD: + if (Operand.getValueType().getScalarType() == MVT::i1) + return getNode(ISD::VECREDUCE_XOR, DL, VT, Operand); + break; case ISD::VECREDUCE_SMIN: case ISD::VECREDUCE_UMAX: if (Operand.getValueType().getScalarType() == MVT::i1) diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll @@ -127,8 +127,14 @@ define i1 @test_v4i1(<4 x i1> %a) nounwind { ; CHECK-LABEL: test_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: addv h0, v0.4h -; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w8, v0.h[1] +; CHECK-NEXT: umov w9, v0.h[0] +; CHECK-NEXT: umov w10, v0.h[2] +; CHECK-NEXT: umov w11, v0.h[3] +; CHECK-NEXT: eor w8, w9, w8 +; CHECK-NEXT: eor w8, w8, w10 +; CHECK-NEXT: eor w8, w8, w11 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %b = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> %a) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll @@ -875,3 +875,125 @@ %red = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> %v) ret i1 %red } + +declare i1 @llvm.vector.reduce.add.v1i1(<1 x i1>) + +define signext i1 @vreduce_add_v1i1(<1 x i1> %v) { +; CHECK-LABEL: vreduce_add_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.add.v2i1(<2 x i1>) + +define signext i1 @vreduce_add_v2i1(<2 x i1> %v) { +; CHECK-LABEL: vreduce_add_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.add.v4i1(<4 x i1>) + +define signext i1 @vreduce_add_v4i1(<4 x i1> %v) { +; CHECK-LABEL: vreduce_add_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.add.v8i1(<8 x i1>) + +define signext i1 @vreduce_add_v8i1(<8 x i1> %v) { +; CHECK-LABEL: vreduce_add_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.add.v16i1(<16 x i1>) + +define signext i1 @vreduce_add_v16i1(<16 x i1> %v) { +; CHECK-LABEL: vreduce_add_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.add.v32i1(<32 x i1>) + +define signext i1 @vreduce_add_v32i1(<32 x i1> %v) { +; LMULMAX1-LABEL: vreduce_add_v32i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; LMULMAX1-NEXT: vmxor.mm v8, v0, v8 +; LMULMAX1-NEXT: vcpop.m a0, v8 +; LMULMAX1-NEXT: andi a0, a0, 1 +; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_add_v32i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: li a0, 32 +; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; LMULMAX8-NEXT: vcpop.m a0, v0 +; LMULMAX8-NEXT: andi a0, a0, 1 +; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: ret + %red = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.add.v64i1(<64 x i1>) + +define signext i1 @vreduce_add_v64i1(<64 x i1> %v) { +; LMULMAX1-LABEL: vreduce_add_v64i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; LMULMAX1-NEXT: vmxor.mm v8, v8, v10 +; LMULMAX1-NEXT: vmxor.mm v9, v0, v9 +; LMULMAX1-NEXT: vmxor.mm v8, v9, v8 +; LMULMAX1-NEXT: vcpop.m a0, v8 +; LMULMAX1-NEXT: andi a0, a0, 1 +; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_add_v64i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: li a0, 64 +; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; LMULMAX8-NEXT: vcpop.m a0, v0 +; LMULMAX8-NEXT: andi a0, a0, 1 +; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: ret + %red = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> %v) + ret i1 %red +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll @@ -708,3 +708,101 @@ %red = call i1 @llvm.vector.reduce.smin.nxv64i1( %v) ret i1 %red } + +declare i1 @llvm.vector.reduce.add.nxv1i1() + +define signext i1 @vreduce_add_nxv1i1( %v) { +; CHECK-LABEL: vreduce_add_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.add.nxv1i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.add.nxv2i1() + +define signext i1 @vreduce_add_nxv2i1( %v) { +; CHECK-LABEL: vreduce_add_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.add.nxv2i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.add.nxv4i1() + +define signext i1 @vreduce_add_nxv4i1( %v) { +; CHECK-LABEL: vreduce_add_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.add.nxv4i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.add.nxv8i1() + +define signext i1 @vreduce_add_nxv8i1( %v) { +; CHECK-LABEL: vreduce_add_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.add.nxv8i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.add.nxv16i1() + +define signext i1 @vreduce_add_nxv16i1( %v) { +; CHECK-LABEL: vreduce_add_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.add.nxv16i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.add.nxv32i1() + +define signext i1 @vreduce_add_nxv32i1( %v) { +; CHECK-LABEL: vreduce_add_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.add.nxv32i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.add.nxv64i1() + +define signext i1 @vreduce_add_nxv64i1( %v) { +; CHECK-LABEL: vreduce_add_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.add.nxv64i1( %v) + ret i1 %red +}