diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll @@ -6,7 +6,7 @@ declare i1 @llvm.vp.reduce.and.v1i1(i1, <1 x i1>, <1 x i1>, i32) -define signext i1 @vpreduce_and_v1i1(i1 signext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_and_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -15,7 +15,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.and.v1i1(i1 %s, <1 x i1> %v, <1 x i1> %m, i32 %evl) ret i1 %r @@ -23,65 +22,39 @@ declare i1 @llvm.vp.reduce.or.v1i1(i1, <1 x i1>, <1 x i1>, i32) -define signext i1 @vpreduce_or_v1i1(i1 signext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_or_v1i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_or_v1i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_or_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_or_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.or.v1i1(i1 %s, <1 x i1> %v, <1 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.xor.v1i1(i1, <1 x i1>, <1 x i1>, i32) -define signext i1 @vpreduce_xor_v1i1(i1 signext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_xor_v1i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_xor_v1i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_xor_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_xor_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.v1i1(i1 %s, <1 x i1> %v, <1 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.and.v2i1(i1, <2 x i1>, <2 x i1>, i32) -define signext i1 @vpreduce_and_v2i1(i1 signext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_and_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -90,7 +63,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.and.v2i1(i1 %s, <2 x i1> %v, <2 x i1> %m, i32 %evl) ret i1 %r @@ -98,65 +70,39 @@ declare i1 @llvm.vp.reduce.or.v2i1(i1, <2 x i1>, <2 x i1>, i32) -define signext i1 @vpreduce_or_v2i1(i1 signext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_or_v2i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_or_v2i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_or_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_or_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.or.v2i1(i1 %s, <2 x i1> %v, <2 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.xor.v2i1(i1, <2 x i1>, <2 x i1>, i32) -define signext i1 @vpreduce_xor_v2i1(i1 signext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_xor_v2i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_xor_v2i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_xor_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_xor_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.v2i1(i1 %s, <2 x i1> %v, <2 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.and.v4i1(i1, <4 x i1>, <4 x i1>, i32) -define signext i1 @vpreduce_and_v4i1(i1 signext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_and_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -165,7 +111,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.and.v4i1(i1 %s, <4 x i1> %v, <4 x i1> %m, i32 %evl) ret i1 %r @@ -173,65 +118,39 @@ declare i1 @llvm.vp.reduce.or.v4i1(i1, <4 x i1>, <4 x i1>, i32) -define signext i1 @vpreduce_or_v4i1(i1 signext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_or_v4i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_or_v4i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_or_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_or_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.or.v4i1(i1 %s, <4 x i1> %v, <4 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.xor.v4i1(i1, <4 x i1>, <4 x i1>, i32) -define signext i1 @vpreduce_xor_v4i1(i1 signext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_xor_v4i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_xor_v4i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_xor_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_xor_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.v4i1(i1 %s, <4 x i1> %v, <4 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.and.v8i1(i1, <8 x i1>, <8 x i1>, i32) -define signext i1 @vpreduce_and_v8i1(i1 signext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_and_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -240,7 +159,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.and.v8i1(i1 %s, <8 x i1> %v, <8 x i1> %m, i32 %evl) ret i1 %r @@ -248,65 +166,39 @@ declare i1 @llvm.vp.reduce.or.v8i1(i1, <8 x i1>, <8 x i1>, i32) -define signext i1 @vpreduce_or_v8i1(i1 signext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_or_v8i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_or_v8i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_or_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_or_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.or.v8i1(i1 %s, <8 x i1> %v, <8 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.xor.v8i1(i1, <8 x i1>, <8 x i1>, i32) -define signext i1 @vpreduce_xor_v8i1(i1 signext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_xor_v8i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_xor_v8i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_xor_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_xor_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.v8i1(i1 %s, <8 x i1> %v, <8 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.and.v10i1(i1, <10 x i1>, <10 x i1>, i32) -define signext i1 @vpreduce_and_v10i1(i1 signext %s, <10 x i1> %v, <10 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_and_v10i1(i1 zeroext %s, <10 x i1> %v, <10 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v10i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -315,7 +207,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.and.v10i1(i1 %s, <10 x i1> %v, <10 x i1> %m, i32 %evl) ret i1 %r @@ -323,7 +214,7 @@ declare i1 @llvm.vp.reduce.and.v16i1(i1, <16 x i1>, <16 x i1>, i32) -define signext i1 @vpreduce_and_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_and_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -332,7 +223,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.and.v16i1(i1 %s, <16 x i1> %v, <16 x i1> %m, i32 %evl) ret i1 %r @@ -340,7 +230,7 @@ declare i1 @llvm.vp.reduce.and.v256i1(i1, <256 x i1>, <256 x i1>, i32) -define signext i1 @vpreduce_and_v256i1(i1 signext %s, <256 x i1> %v, <256 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_and_v256i1(i1 zeroext %s, <256 x i1> %v, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v256i1: ; CHECK: # %bb.0: ; CHECK-NEXT: li a3, 128 @@ -365,7 +255,6 @@ ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.and.v256i1(i1 %s, <256 x i1> %v, <256 x i1> %m, i32 %evl) ret i1 %r @@ -373,205 +262,119 @@ declare i1 @llvm.vp.reduce.or.v16i1(i1, <16 x i1>, <16 x i1>, i32) -define signext i1 @vpreduce_or_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_or_v16i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_or_v16i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_or_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_or_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.or.v16i1(i1 %s, <16 x i1> %v, <16 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.xor.v16i1(i1, <16 x i1>, <16 x i1>, i32) -define signext i1 @vpreduce_xor_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_xor_v16i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_xor_v16i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_xor_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_xor_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.v16i1(i1 %s, <16 x i1> %v, <16 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.add.v1i1(i1, <1 x i1>, <1 x i1>, i32) -define signext i1 @vpreduce_add_v1i1(i1 signext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_add_v1i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_add_v1i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_add_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_add_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.add.v1i1(i1 %s, <1 x i1> %v, <1 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.add.v2i1(i1, <2 x i1>, <2 x i1>, i32) -define signext i1 @vpreduce_add_v2i1(i1 signext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_add_v2i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_add_v2i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_add_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_add_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.add.v2i1(i1 %s, <2 x i1> %v, <2 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.add.v4i1(i1, <4 x i1>, <4 x i1>, i32) -define signext i1 @vpreduce_add_v4i1(i1 signext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_add_v4i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_add_v4i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_add_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_add_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.add.v4i1(i1 %s, <4 x i1> %v, <4 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.add.v8i1(i1, <8 x i1>, <8 x i1>, i32) -define signext i1 @vpreduce_add_v8i1(i1 signext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_add_v8i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_add_v8i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_add_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_add_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.add.v8i1(i1 %s, <8 x i1> %v, <8 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.add.v16i1(i1, <16 x i1>, <16 x i1>, i32) -define signext i1 @vpreduce_add_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_add_v16i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_add_v16i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_add_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_add_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.add.v16i1(i1 %s, <16 x i1> %v, <16 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.smax.v1i1(i1, <1 x i1>, <1 x i1>, i32) -define signext i1 @vpreduce_smax_v1i1(i1 signext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_smax_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -580,7 +383,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smax.v1i1(i1 %s, <1 x i1> %v, <1 x i1> %m, i32 %evl) ret i1 %r @@ -588,7 +390,7 @@ declare i1 @llvm.vp.reduce.smax.v2i1(i1, <2 x i1>, <2 x i1>, i32) -define signext i1 @vpreduce_smax_v2i1(i1 signext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_smax_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -597,7 +399,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smax.v2i1(i1 %s, <2 x i1> %v, <2 x i1> %m, i32 %evl) ret i1 %r @@ -605,7 +406,7 @@ declare i1 @llvm.vp.reduce.smax.v4i1(i1, <4 x i1>, <4 x i1>, i32) -define signext i1 @vpreduce_smax_v4i1(i1 signext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_smax_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -614,7 +415,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smax.v4i1(i1 %s, <4 x i1> %v, <4 x i1> %m, i32 %evl) ret i1 %r @@ -622,7 +422,7 @@ declare i1 @llvm.vp.reduce.smax.v8i1(i1, <8 x i1>, <8 x i1>, i32) -define signext i1 @vpreduce_smax_v8i1(i1 signext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_smax_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -631,7 +431,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smax.v8i1(i1 %s, <8 x i1> %v, <8 x i1> %m, i32 %evl) ret i1 %r @@ -639,7 +438,7 @@ declare i1 @llvm.vp.reduce.smax.v16i1(i1, <16 x i1>, <16 x i1>, i32) -define signext i1 @vpreduce_smax_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_smax_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -648,7 +447,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smax.v16i1(i1 %s, <16 x i1> %v, <16 x i1> %m, i32 %evl) ret i1 %r @@ -656,7 +454,7 @@ declare i1 @llvm.vp.reduce.smax.v32i1(i1, <32 x i1>, <32 x i1>, i32) -define signext i1 @vpreduce_smax_v32i1(i1 signext %s, <32 x i1> %v, <32 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_smax_v32i1(i1 zeroext %s, <32 x i1> %v, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_v32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -665,7 +463,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smax.v32i1(i1 %s, <32 x i1> %v, <32 x i1> %m, i32 %evl) ret i1 %r @@ -673,7 +470,7 @@ declare i1 @llvm.vp.reduce.smax.v64i1(i1, <64 x i1>, <64 x i1>, i32) -define signext i1 @vpreduce_smax_v64i1(i1 signext %s, <64 x i1> %v, <64 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_smax_v64i1(i1 zeroext %s, <64 x i1> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_v64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -682,7 +479,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smax.v64i1(i1 %s, <64 x i1> %v, <64 x i1> %m, i32 %evl) ret i1 %r @@ -690,427 +486,231 @@ declare i1 @llvm.vp.reduce.smin.v1i1(i1, <1 x i1>, <1 x i1>, i32) -define signext i1 @vpreduce_smin_v1i1(i1 signext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_smin_v1i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_smin_v1i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_smin_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_smin_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.v1i1(i1 %s, <1 x i1> %v, <1 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.smin.v2i1(i1, <2 x i1>, <2 x i1>, i32) -define signext i1 @vpreduce_smin_v2i1(i1 signext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_smin_v2i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_smin_v2i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_smin_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_smin_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.v2i1(i1 %s, <2 x i1> %v, <2 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.smin.v4i1(i1, <4 x i1>, <4 x i1>, i32) -define signext i1 @vpreduce_smin_v4i1(i1 signext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_smin_v4i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_smin_v4i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_smin_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_smin_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.v4i1(i1 %s, <4 x i1> %v, <4 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.smin.v8i1(i1, <8 x i1>, <8 x i1>, i32) -define signext i1 @vpreduce_smin_v8i1(i1 signext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_smin_v8i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_smin_v8i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_smin_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_smin_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.v8i1(i1 %s, <8 x i1> %v, <8 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.smin.v16i1(i1, <16 x i1>, <16 x i1>, i32) -define signext i1 @vpreduce_smin_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_smin_v16i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_smin_v16i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_smin_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_smin_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.v16i1(i1 %s, <16 x i1> %v, <16 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.smin.v32i1(i1, <32 x i1>, <32 x i1>, i32) -define signext i1 @vpreduce_smin_v32i1(i1 signext %s, <32 x i1> %v, <32 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_smin_v32i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_smin_v32i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_smin_v32i1(i1 zeroext %s, <32 x i1> %v, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_smin_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.v32i1(i1 %s, <32 x i1> %v, <32 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.smin.v64i1(i1, <64 x i1>, <64 x i1>, i32) -define signext i1 @vpreduce_smin_v64i1(i1 signext %s, <64 x i1> %v, <64 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_smin_v64i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_smin_v64i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_smin_v64i1(i1 zeroext %s, <64 x i1> %v, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_smin_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.v64i1(i1 %s, <64 x i1> %v, <64 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.umax.v1i1(i1, <1 x i1>, <1 x i1>, i32) -define signext i1 @vpreduce_umax_v1i1(i1 signext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_v1i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_v1i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_umax_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_umax_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.v1i1(i1 %s, <1 x i1> %v, <1 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.umax.v2i1(i1, <2 x i1>, <2 x i1>, i32) -define signext i1 @vpreduce_umax_v2i1(i1 signext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_v2i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_v2i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_umax_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_umax_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.v2i1(i1 %s, <2 x i1> %v, <2 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.umax.v4i1(i1, <4 x i1>, <4 x i1>, i32) -define signext i1 @vpreduce_umax_v4i1(i1 signext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_v4i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_v4i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_umax_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_umax_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.v4i1(i1 %s, <4 x i1> %v, <4 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.umax.v8i1(i1, <8 x i1>, <8 x i1>, i32) -define signext i1 @vpreduce_umax_v8i1(i1 signext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_v8i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_v8i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_umax_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_umax_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.v8i1(i1 %s, <8 x i1> %v, <8 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.umax.v16i1(i1, <16 x i1>, <16 x i1>, i32) -define signext i1 @vpreduce_umax_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_v16i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_v16i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_umax_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_umax_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.v16i1(i1 %s, <16 x i1> %v, <16 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.umax.v32i1(i1, <32 x i1>, <32 x i1>, i32) -define signext i1 @vpreduce_umax_v32i1(i1 signext %s, <32 x i1> %v, <32 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_v32i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_v32i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_umax_v32i1(i1 zeroext %s, <32 x i1> %v, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_umax_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.v32i1(i1 %s, <32 x i1> %v, <32 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.umax.v64i1(i1, <64 x i1>, <64 x i1>, i32) -define signext i1 @vpreduce_umax_v64i1(i1 signext %s, <64 x i1> %v, <64 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_v64i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_v64i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_umax_v64i1(i1 zeroext %s, <64 x i1> %v, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_umax_v64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.v64i1(i1 %s, <64 x i1> %v, <64 x i1> %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.umin.v1i1(i1, <1 x i1>, <1 x i1>, i32) -define signext i1 @vpreduce_umin_v1i1(i1 signext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_umin_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -1119,7 +719,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umin.v1i1(i1 %s, <1 x i1> %v, <1 x i1> %m, i32 %evl) ret i1 %r @@ -1127,7 +726,7 @@ declare i1 @llvm.vp.reduce.umin.v2i1(i1, <2 x i1>, <2 x i1>, i32) -define signext i1 @vpreduce_umin_v2i1(i1 signext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_umin_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -1136,7 +735,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umin.v2i1(i1 %s, <2 x i1> %v, <2 x i1> %m, i32 %evl) ret i1 %r @@ -1144,7 +742,7 @@ declare i1 @llvm.vp.reduce.umin.v4i1(i1, <4 x i1>, <4 x i1>, i32) -define signext i1 @vpreduce_umin_v4i1(i1 signext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_umin_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -1153,7 +751,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umin.v4i1(i1 %s, <4 x i1> %v, <4 x i1> %m, i32 %evl) ret i1 %r @@ -1161,7 +758,7 @@ declare i1 @llvm.vp.reduce.umin.v8i1(i1, <8 x i1>, <8 x i1>, i32) -define signext i1 @vpreduce_umin_v8i1(i1 signext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_umin_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -1170,7 +767,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umin.v8i1(i1 %s, <8 x i1> %v, <8 x i1> %m, i32 %evl) ret i1 %r @@ -1178,7 +774,7 @@ declare i1 @llvm.vp.reduce.umin.v16i1(i1, <16 x i1>, <16 x i1>, i32) -define signext i1 @vpreduce_umin_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_umin_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -1187,7 +783,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umin.v16i1(i1 %s, <16 x i1> %v, <16 x i1> %m, i32 %evl) ret i1 %r @@ -1195,7 +790,7 @@ declare i1 @llvm.vp.reduce.umin.v32i1(i1, <32 x i1>, <32 x i1>, i32) -define signext i1 @vpreduce_umin_v32i1(i1 signext %s, <32 x i1> %v, <32 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_umin_v32i1(i1 zeroext %s, <32 x i1> %v, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_v32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -1204,7 +799,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umin.v32i1(i1 %s, <32 x i1> %v, <32 x i1> %m, i32 %evl) ret i1 %r @@ -1212,7 +806,7 @@ declare i1 @llvm.vp.reduce.umin.v64i1(i1, <64 x i1>, <64 x i1>, i32) -define signext i1 @vpreduce_umin_v64i1(i1 signext %s, <64 x i1> %v, <64 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_umin_v64i1(i1 zeroext %s, <64 x i1> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_v64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -1221,7 +815,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umin.v64i1(i1 %s, <64 x i1> %v, <64 x i1> %m, i32 %evl) ret i1 %r @@ -1245,7 +838,7 @@ declare i1 @llvm.vp.reduce.mul.v2i1(i1, <2 x i1>, <2 x i1>, i32) -define signext i1 @vpreduce_mul_v2i1(i1 signext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_mul_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_mul_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -1254,7 +847,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.mul.v2i1(i1 %s, <2 x i1> %v, <2 x i1> %m, i32 %evl) ret i1 %r @@ -1262,7 +854,7 @@ declare i1 @llvm.vp.reduce.mul.v4i1(i1, <4 x i1>, <4 x i1>, i32) -define signext i1 @vpreduce_mul_v4i1(i1 signext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_mul_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_mul_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -1271,7 +863,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.mul.v4i1(i1 %s, <4 x i1> %v, <4 x i1> %m, i32 %evl) ret i1 %r @@ -1279,7 +870,7 @@ declare i1 @llvm.vp.reduce.mul.v8i1(i1, <8 x i1>, <8 x i1>, i32) -define signext i1 @vpreduce_mul_v8i1(i1 signext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_mul_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_mul_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -1288,7 +879,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.mul.v8i1(i1 %s, <8 x i1> %v, <8 x i1> %m, i32 %evl) ret i1 %r @@ -1296,7 +886,7 @@ declare i1 @llvm.vp.reduce.mul.v16i1(i1, <16 x i1>, <16 x i1>, i32) -define signext i1 @vpreduce_mul_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_mul_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_mul_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -1305,7 +895,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.mul.v16i1(i1 %s, <16 x i1> %v, <16 x i1> %m, i32 %evl) ret i1 %r @@ -1313,7 +902,7 @@ declare i1 @llvm.vp.reduce.mul.v32i1(i1, <32 x i1>, <32 x i1>, i32) -define signext i1 @vpreduce_mul_v32i1(i1 signext %s, <32 x i1> %v, <32 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_mul_v32i1(i1 zeroext %s, <32 x i1> %v, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_mul_v32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -1322,7 +911,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.mul.v32i1(i1 %s, <32 x i1> %v, <32 x i1> %m, i32 %evl) ret i1 %r @@ -1330,7 +918,7 @@ declare i1 @llvm.vp.reduce.mul.v64i1(i1, <64 x i1>, <64 x i1>, i32) -define signext i1 @vpreduce_mul_v64i1(i1 signext %s, <64 x i1> %v, <64 x i1> %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_mul_v64i1(i1 zeroext %s, <64 x i1> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_mul_v64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -1339,8 +927,10 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.mul.v64i1(i1 %s, <64 x i1> %v, <64 x i1> %m, i32 %evl) ret i1 %r } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll @@ -6,13 +6,12 @@ declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1>) -define signext i1 @vreduce_or_v1i1(<1 x i1> %v) { +define zeroext i1 @vreduce_or_v1i1(<1 x i1> %v) { ; CHECK-LABEL: vreduce_or_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %v) ret i1 %red @@ -20,13 +19,12 @@ declare i1 @llvm.vector.reduce.xor.v1i1(<1 x i1>) -define signext i1 @vreduce_xor_v1i1(<1 x i1> %v) { +define zeroext i1 @vreduce_xor_v1i1(<1 x i1> %v) { ; CHECK-LABEL: vreduce_xor_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %v) ret i1 %red @@ -34,13 +32,12 @@ declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1>) -define signext i1 @vreduce_and_v1i1(<1 x i1> %v) { +define zeroext i1 @vreduce_and_v1i1(<1 x i1> %v) { ; CHECK-LABEL: vreduce_and_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %v) ret i1 %red @@ -48,13 +45,12 @@ declare i1 @llvm.vector.reduce.umax.v1i1(<1 x i1>) -define signext i1 @vreduce_umax_v1i1(<1 x i1> %v) { +define zeroext i1 @vreduce_umax_v1i1(<1 x i1> %v) { ; CHECK-LABEL: vreduce_umax_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> %v) ret i1 %red @@ -62,13 +58,12 @@ declare i1 @llvm.vector.reduce.smax.v1i1(<1 x i1>) -define signext i1 @vreduce_smax_v1i1(<1 x i1> %v) { +define zeroext i1 @vreduce_smax_v1i1(<1 x i1> %v) { ; CHECK-LABEL: vreduce_smax_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> %v) ret i1 %red @@ -76,13 +71,12 @@ declare i1 @llvm.vector.reduce.umin.v1i1(<1 x i1>) -define signext i1 @vreduce_umin_v1i1(<1 x i1> %v) { +define zeroext i1 @vreduce_umin_v1i1(<1 x i1> %v) { ; CHECK-LABEL: vreduce_umin_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> %v) ret i1 %red @@ -90,13 +84,12 @@ declare i1 @llvm.vector.reduce.smin.v1i1(<1 x i1>) -define signext i1 @vreduce_smin_v1i1(<1 x i1> %v) { +define zeroext i1 @vreduce_smin_v1i1(<1 x i1> %v) { ; CHECK-LABEL: vreduce_smin_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.v1i1(<1 x i1> %v) ret i1 %red @@ -104,13 +97,12 @@ declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1>) -define signext i1 @vreduce_or_v2i1(<2 x i1> %v) { +define zeroext i1 @vreduce_or_v2i1(<2 x i1> %v) { ; CHECK-LABEL: vreduce_or_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %v) ret i1 %red @@ -118,52 +110,26 @@ declare i1 @llvm.vector.reduce.xor.v2i1(<2 x i1>) -define signext i1 @vreduce_xor_v2i1(<2 x i1> %v) { -; LMULMAX1-RV32-LABEL: vreduce_xor_v2i1: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX1-RV32-NEXT: slli a0, a0, 31 -; LMULMAX1-RV32-NEXT: srai a0, a0, 31 -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: vreduce_xor_v2i1: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX1-RV64-NEXT: slli a0, a0, 63 -; LMULMAX1-RV64-NEXT: srai a0, a0, 63 -; LMULMAX1-RV64-NEXT: ret -; -; LMULMAX8-RV32-LABEL: vreduce_xor_v2i1: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX8-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV32-NEXT: slli a0, a0, 31 -; LMULMAX8-RV32-NEXT: srai a0, a0, 31 -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: vreduce_xor_v2i1: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX8-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV64-NEXT: slli a0, a0, 63 -; LMULMAX8-RV64-NEXT: srai a0, a0, 63 -; LMULMAX8-RV64-NEXT: ret +define zeroext i1 @vreduce_xor_v2i1(<2 x i1> %v) { +; CHECK-LABEL: vreduce_xor_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %v) ret i1 %red } declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>) -define signext i1 @vreduce_and_v2i1(<2 x i1> %v) { +define zeroext i1 @vreduce_and_v2i1(<2 x i1> %v) { ; CHECK-LABEL: vreduce_and_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %v) ret i1 %red @@ -171,13 +137,12 @@ declare i1 @llvm.vector.reduce.umax.v2i1(<2 x i1>) -define signext i1 @vreduce_umax_v2i1(<2 x i1> %v) { +define zeroext i1 @vreduce_umax_v2i1(<2 x i1> %v) { ; CHECK-LABEL: vreduce_umax_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.v2i1(<2 x i1> %v) ret i1 %red @@ -185,14 +150,13 @@ declare i1 @llvm.vector.reduce.smax.v2i1(<2 x i1>) -define signext i1 @vreduce_smax_v2i1(<2 x i1> %v) { +define zeroext i1 @vreduce_smax_v2i1(<2 x i1> %v) { ; CHECK-LABEL: vreduce_smax_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.v2i1(<2 x i1> %v) ret i1 %red @@ -200,14 +164,13 @@ declare i1 @llvm.vector.reduce.umin.v2i1(<2 x i1>) -define signext i1 @vreduce_umin_v2i1(<2 x i1> %v) { +define zeroext i1 @vreduce_umin_v2i1(<2 x i1> %v) { ; CHECK-LABEL: vreduce_umin_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.v2i1(<2 x i1> %v) ret i1 %red @@ -215,13 +178,12 @@ declare i1 @llvm.vector.reduce.smin.v2i1(<2 x i1>) -define signext i1 @vreduce_smin_v2i1(<2 x i1> %v) { +define zeroext i1 @vreduce_smin_v2i1(<2 x i1> %v) { ; CHECK-LABEL: vreduce_smin_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.v2i1(<2 x i1> %v) ret i1 %red @@ -229,13 +191,12 @@ declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1>) -define signext i1 @vreduce_or_v4i1(<4 x i1> %v) { +define zeroext i1 @vreduce_or_v4i1(<4 x i1> %v) { ; CHECK-LABEL: vreduce_or_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %v) ret i1 %red @@ -243,52 +204,26 @@ declare i1 @llvm.vector.reduce.xor.v4i1(<4 x i1>) -define signext i1 @vreduce_xor_v4i1(<4 x i1> %v) { -; LMULMAX1-RV32-LABEL: vreduce_xor_v4i1: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX1-RV32-NEXT: slli a0, a0, 31 -; LMULMAX1-RV32-NEXT: srai a0, a0, 31 -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: vreduce_xor_v4i1: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX1-RV64-NEXT: slli a0, a0, 63 -; LMULMAX1-RV64-NEXT: srai a0, a0, 63 -; LMULMAX1-RV64-NEXT: ret -; -; LMULMAX8-RV32-LABEL: vreduce_xor_v4i1: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX8-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV32-NEXT: slli a0, a0, 31 -; LMULMAX8-RV32-NEXT: srai a0, a0, 31 -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: vreduce_xor_v4i1: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX8-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV64-NEXT: slli a0, a0, 63 -; LMULMAX8-RV64-NEXT: srai a0, a0, 63 -; LMULMAX8-RV64-NEXT: ret +define zeroext i1 @vreduce_xor_v4i1(<4 x i1> %v) { +; CHECK-LABEL: vreduce_xor_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %v) ret i1 %red } declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>) -define signext i1 @vreduce_and_v4i1(<4 x i1> %v) { +define zeroext i1 @vreduce_and_v4i1(<4 x i1> %v) { ; CHECK-LABEL: vreduce_and_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %v) ret i1 %red @@ -296,13 +231,12 @@ declare i1 @llvm.vector.reduce.umax.v4i1(<4 x i1>) -define signext i1 @vreduce_umax_v4i1(<4 x i1> %v) { +define zeroext i1 @vreduce_umax_v4i1(<4 x i1> %v) { ; CHECK-LABEL: vreduce_umax_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> %v) ret i1 %red @@ -310,14 +244,13 @@ declare i1 @llvm.vector.reduce.smax.v4i1(<4 x i1>) -define signext i1 @vreduce_smax_v4i1(<4 x i1> %v) { +define zeroext i1 @vreduce_smax_v4i1(<4 x i1> %v) { ; CHECK-LABEL: vreduce_smax_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.v4i1(<4 x i1> %v) ret i1 %red @@ -325,14 +258,13 @@ declare i1 @llvm.vector.reduce.umin.v4i1(<4 x i1>) -define signext i1 @vreduce_umin_v4i1(<4 x i1> %v) { +define zeroext i1 @vreduce_umin_v4i1(<4 x i1> %v) { ; CHECK-LABEL: vreduce_umin_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.v4i1(<4 x i1> %v) ret i1 %red @@ -340,13 +272,12 @@ declare i1 @llvm.vector.reduce.smin.v4i1(<4 x i1>) -define signext i1 @vreduce_smin_v4i1(<4 x i1> %v) { +define zeroext i1 @vreduce_smin_v4i1(<4 x i1> %v) { ; CHECK-LABEL: vreduce_smin_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.v4i1(<4 x i1> %v) ret i1 %red @@ -354,13 +285,12 @@ declare i1 @llvm.vector.reduce.or.v8i1(<8 x i1>) -define signext i1 @vreduce_or_v8i1(<8 x i1> %v) { +define zeroext i1 @vreduce_or_v8i1(<8 x i1> %v) { ; CHECK-LABEL: vreduce_or_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %v) ret i1 %red @@ -368,52 +298,26 @@ declare i1 @llvm.vector.reduce.xor.v8i1(<8 x i1>) -define signext i1 @vreduce_xor_v8i1(<8 x i1> %v) { -; LMULMAX1-RV32-LABEL: vreduce_xor_v8i1: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX1-RV32-NEXT: slli a0, a0, 31 -; LMULMAX1-RV32-NEXT: srai a0, a0, 31 -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: vreduce_xor_v8i1: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX1-RV64-NEXT: slli a0, a0, 63 -; LMULMAX1-RV64-NEXT: srai a0, a0, 63 -; LMULMAX1-RV64-NEXT: ret -; -; LMULMAX8-RV32-LABEL: vreduce_xor_v8i1: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX8-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV32-NEXT: slli a0, a0, 31 -; LMULMAX8-RV32-NEXT: srai a0, a0, 31 -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: vreduce_xor_v8i1: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX8-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV64-NEXT: slli a0, a0, 63 -; LMULMAX8-RV64-NEXT: srai a0, a0, 63 -; LMULMAX8-RV64-NEXT: ret +define zeroext i1 @vreduce_xor_v8i1(<8 x i1> %v) { +; CHECK-LABEL: vreduce_xor_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %v) ret i1 %red } declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>) -define signext i1 @vreduce_and_v8i1(<8 x i1> %v) { +define zeroext i1 @vreduce_and_v8i1(<8 x i1> %v) { ; CHECK-LABEL: vreduce_and_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %v) ret i1 %red @@ -421,13 +325,12 @@ declare i1 @llvm.vector.reduce.umax.v8i1(<8 x i1>) -define signext i1 @vreduce_umax_v8i1(<8 x i1> %v) { +define zeroext i1 @vreduce_umax_v8i1(<8 x i1> %v) { ; CHECK-LABEL: vreduce_umax_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.v8i1(<8 x i1> %v) ret i1 %red @@ -435,14 +338,13 @@ declare i1 @llvm.vector.reduce.smax.v8i1(<8 x i1>) -define signext i1 @vreduce_smax_v8i1(<8 x i1> %v) { +define zeroext i1 @vreduce_smax_v8i1(<8 x i1> %v) { ; CHECK-LABEL: vreduce_smax_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.v8i1(<8 x i1> %v) ret i1 %red @@ -450,14 +352,13 @@ declare i1 @llvm.vector.reduce.umin.v8i1(<8 x i1>) -define signext i1 @vreduce_umin_v8i1(<8 x i1> %v) { +define zeroext i1 @vreduce_umin_v8i1(<8 x i1> %v) { ; CHECK-LABEL: vreduce_umin_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.v8i1(<8 x i1> %v) ret i1 %red @@ -465,13 +366,12 @@ declare i1 @llvm.vector.reduce.smin.v8i1(<8 x i1>) -define signext i1 @vreduce_smin_v8i1(<8 x i1> %v) { +define zeroext i1 @vreduce_smin_v8i1(<8 x i1> %v) { ; CHECK-LABEL: vreduce_smin_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.v8i1(<8 x i1> %v) ret i1 %red @@ -479,13 +379,12 @@ declare i1 @llvm.vector.reduce.or.v16i1(<16 x i1>) -define signext i1 @vreduce_or_v16i1(<16 x i1> %v) { +define zeroext i1 @vreduce_or_v16i1(<16 x i1> %v) { ; CHECK-LABEL: vreduce_or_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %v) ret i1 %red @@ -493,52 +392,26 @@ declare i1 @llvm.vector.reduce.xor.v16i1(<16 x i1>) -define signext i1 @vreduce_xor_v16i1(<16 x i1> %v) { -; LMULMAX1-RV32-LABEL: vreduce_xor_v16i1: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX1-RV32-NEXT: slli a0, a0, 31 -; LMULMAX1-RV32-NEXT: srai a0, a0, 31 -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: vreduce_xor_v16i1: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX1-RV64-NEXT: slli a0, a0, 63 -; LMULMAX1-RV64-NEXT: srai a0, a0, 63 -; LMULMAX1-RV64-NEXT: ret -; -; LMULMAX8-RV32-LABEL: vreduce_xor_v16i1: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX8-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV32-NEXT: slli a0, a0, 31 -; LMULMAX8-RV32-NEXT: srai a0, a0, 31 -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: vreduce_xor_v16i1: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX8-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV64-NEXT: slli a0, a0, 63 -; LMULMAX8-RV64-NEXT: srai a0, a0, 63 -; LMULMAX8-RV64-NEXT: ret +define zeroext i1 @vreduce_xor_v16i1(<16 x i1> %v) { +; CHECK-LABEL: vreduce_xor_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %v) ret i1 %red } declare i1 @llvm.vector.reduce.and.v16i1(<16 x i1>) -define signext i1 @vreduce_and_v16i1(<16 x i1> %v) { +define zeroext i1 @vreduce_and_v16i1(<16 x i1> %v) { ; CHECK-LABEL: vreduce_and_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %v) ret i1 %red @@ -546,13 +419,12 @@ declare i1 @llvm.vector.reduce.umax.v16i1(<16 x i1>) -define signext i1 @vreduce_umax_v16i1(<16 x i1> %v) { +define zeroext i1 @vreduce_umax_v16i1(<16 x i1> %v) { ; CHECK-LABEL: vreduce_umax_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.v16i1(<16 x i1> %v) ret i1 %red @@ -560,14 +432,13 @@ declare i1 @llvm.vector.reduce.smax.v16i1(<16 x i1>) -define signext i1 @vreduce_smax_v16i1(<16 x i1> %v) { +define zeroext i1 @vreduce_smax_v16i1(<16 x i1> %v) { ; CHECK-LABEL: vreduce_smax_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.v16i1(<16 x i1> %v) ret i1 %red @@ -575,14 +446,13 @@ declare i1 @llvm.vector.reduce.umin.v16i1(<16 x i1>) -define signext i1 @vreduce_umin_v16i1(<16 x i1> %v) { +define zeroext i1 @vreduce_umin_v16i1(<16 x i1> %v) { ; CHECK-LABEL: vreduce_umin_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.v16i1(<16 x i1> %v) ret i1 %red @@ -590,13 +460,12 @@ declare i1 @llvm.vector.reduce.smin.v16i1(<16 x i1>) -define signext i1 @vreduce_smin_v16i1(<16 x i1> %v) { +define zeroext i1 @vreduce_smin_v16i1(<16 x i1> %v) { ; CHECK-LABEL: vreduce_smin_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.v16i1(<16 x i1> %v) ret i1 %red @@ -604,14 +473,13 @@ declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1>) -define signext i1 @vreduce_or_v32i1(<32 x i1> %v) { +define zeroext i1 @vreduce_or_v32i1(<32 x i1> %v) { ; LMULMAX1-LABEL: vreduce_or_v32i1: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-NEXT: vmor.mm v8, v0, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: seqz a0, a0 -; LMULMAX1-NEXT: addi a0, a0, -1 +; LMULMAX1-NEXT: snez a0, a0 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_or_v32i1: @@ -619,8 +487,7 @@ ; LMULMAX8-NEXT: li a0, 32 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: seqz a0, a0 -; LMULMAX8-NEXT: addi a0, a0, -1 +; LMULMAX8-NEXT: snez a0, a0 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %v) ret i1 %red @@ -628,56 +495,35 @@ declare i1 @llvm.vector.reduce.xor.v32i1(<32 x i1>) -define signext i1 @vreduce_xor_v32i1(<32 x i1> %v) { -; LMULMAX1-RV32-LABEL: vreduce_xor_v32i1: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmxor.mm v8, v0, v8 -; LMULMAX1-RV32-NEXT: vcpop.m a0, v8 -; LMULMAX1-RV32-NEXT: slli a0, a0, 31 -; LMULMAX1-RV32-NEXT: srai a0, a0, 31 -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: vreduce_xor_v32i1: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmxor.mm v8, v0, v8 -; LMULMAX1-RV64-NEXT: vcpop.m a0, v8 -; LMULMAX1-RV64-NEXT: slli a0, a0, 63 -; LMULMAX1-RV64-NEXT: srai a0, a0, 63 -; LMULMAX1-RV64-NEXT: ret -; -; LMULMAX8-RV32-LABEL: vreduce_xor_v32i1: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: li a0, 32 -; LMULMAX8-RV32-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; LMULMAX8-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV32-NEXT: slli a0, a0, 31 -; LMULMAX8-RV32-NEXT: srai a0, a0, 31 -; LMULMAX8-RV32-NEXT: ret +define zeroext i1 @vreduce_xor_v32i1(<32 x i1> %v) { +; LMULMAX1-LABEL: vreduce_xor_v32i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; LMULMAX1-NEXT: vmxor.mm v8, v0, v8 +; LMULMAX1-NEXT: vcpop.m a0, v8 +; LMULMAX1-NEXT: andi a0, a0, 1 +; LMULMAX1-NEXT: ret ; -; LMULMAX8-RV64-LABEL: vreduce_xor_v32i1: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: li a0, 32 -; LMULMAX8-RV64-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; LMULMAX8-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV64-NEXT: slli a0, a0, 63 -; LMULMAX8-RV64-NEXT: srai a0, a0, 63 -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: vreduce_xor_v32i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: li a0, 32 +; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; LMULMAX8-NEXT: vcpop.m a0, v0 +; LMULMAX8-NEXT: andi a0, a0, 1 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %v) ret i1 %red } declare i1 @llvm.vector.reduce.and.v32i1(<32 x i1>) -define signext i1 @vreduce_and_v32i1(<32 x i1> %v) { +define zeroext i1 @vreduce_and_v32i1(<32 x i1> %v) { ; LMULMAX1-LABEL: vreduce_and_v32i1: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-NEXT: vmnand.mm v8, v0, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: snez a0, a0 -; LMULMAX1-NEXT: addi a0, a0, -1 +; LMULMAX1-NEXT: seqz a0, a0 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_and_v32i1: @@ -686,8 +532,7 @@ ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; LMULMAX8-NEXT: vmnot.m v8, v0 ; LMULMAX8-NEXT: vcpop.m a0, v8 -; LMULMAX8-NEXT: snez a0, a0 -; LMULMAX8-NEXT: addi a0, a0, -1 +; LMULMAX8-NEXT: seqz a0, a0 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %v) ret i1 %red @@ -695,14 +540,13 @@ declare i1 @llvm.vector.reduce.umax.v32i1(<32 x i1>) -define signext i1 @vreduce_umax_v32i1(<32 x i1> %v) { +define zeroext i1 @vreduce_umax_v32i1(<32 x i1> %v) { ; LMULMAX1-LABEL: vreduce_umax_v32i1: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-NEXT: vmor.mm v8, v0, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: seqz a0, a0 -; LMULMAX1-NEXT: addi a0, a0, -1 +; LMULMAX1-NEXT: snez a0, a0 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_umax_v32i1: @@ -710,8 +554,7 @@ ; LMULMAX8-NEXT: li a0, 32 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: seqz a0, a0 -; LMULMAX8-NEXT: addi a0, a0, -1 +; LMULMAX8-NEXT: snez a0, a0 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> %v) ret i1 %red @@ -719,14 +562,13 @@ declare i1 @llvm.vector.reduce.smax.v32i1(<32 x i1>) -define signext i1 @vreduce_smax_v32i1(<32 x i1> %v) { +define zeroext i1 @vreduce_smax_v32i1(<32 x i1> %v) { ; LMULMAX1-LABEL: vreduce_smax_v32i1: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-NEXT: vmnand.mm v8, v0, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: snez a0, a0 -; LMULMAX1-NEXT: addi a0, a0, -1 +; LMULMAX1-NEXT: seqz a0, a0 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_smax_v32i1: @@ -735,8 +577,7 @@ ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; LMULMAX8-NEXT: vmnot.m v8, v0 ; LMULMAX8-NEXT: vcpop.m a0, v8 -; LMULMAX8-NEXT: snez a0, a0 -; LMULMAX8-NEXT: addi a0, a0, -1 +; LMULMAX8-NEXT: seqz a0, a0 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> %v) ret i1 %red @@ -744,14 +585,13 @@ declare i1 @llvm.vector.reduce.umin.v32i1(<32 x i1>) -define signext i1 @vreduce_umin_v32i1(<32 x i1> %v) { +define zeroext i1 @vreduce_umin_v32i1(<32 x i1> %v) { ; LMULMAX1-LABEL: vreduce_umin_v32i1: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-NEXT: vmnand.mm v8, v0, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: snez a0, a0 -; LMULMAX1-NEXT: addi a0, a0, -1 +; LMULMAX1-NEXT: seqz a0, a0 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_umin_v32i1: @@ -760,8 +600,7 @@ ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; LMULMAX8-NEXT: vmnot.m v8, v0 ; LMULMAX8-NEXT: vcpop.m a0, v8 -; LMULMAX8-NEXT: snez a0, a0 -; LMULMAX8-NEXT: addi a0, a0, -1 +; LMULMAX8-NEXT: seqz a0, a0 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> %v) ret i1 %red @@ -769,14 +608,13 @@ declare i1 @llvm.vector.reduce.smin.v32i1(<32 x i1>) -define signext i1 @vreduce_smin_v32i1(<32 x i1> %v) { +define zeroext i1 @vreduce_smin_v32i1(<32 x i1> %v) { ; LMULMAX1-LABEL: vreduce_smin_v32i1: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-NEXT: vmor.mm v8, v0, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: seqz a0, a0 -; LMULMAX1-NEXT: addi a0, a0, -1 +; LMULMAX1-NEXT: snez a0, a0 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_smin_v32i1: @@ -784,8 +622,7 @@ ; LMULMAX8-NEXT: li a0, 32 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: seqz a0, a0 -; LMULMAX8-NEXT: addi a0, a0, -1 +; LMULMAX8-NEXT: snez a0, a0 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> %v) ret i1 %red @@ -793,7 +630,7 @@ declare i1 @llvm.vector.reduce.or.v64i1(<64 x i1>) -define signext i1 @vreduce_or_v64i1(<64 x i1> %v) { +define zeroext i1 @vreduce_or_v64i1(<64 x i1> %v) { ; LMULMAX1-LABEL: vreduce_or_v64i1: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma @@ -801,8 +638,7 @@ ; LMULMAX1-NEXT: vmor.mm v9, v0, v9 ; LMULMAX1-NEXT: vmor.mm v8, v9, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: seqz a0, a0 -; LMULMAX1-NEXT: addi a0, a0, -1 +; LMULMAX1-NEXT: snez a0, a0 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_or_v64i1: @@ -810,8 +646,7 @@ ; LMULMAX8-NEXT: li a0, 64 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: seqz a0, a0 -; LMULMAX8-NEXT: addi a0, a0, -1 +; LMULMAX8-NEXT: snez a0, a0 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> %v) ret i1 %red @@ -819,53 +654,31 @@ declare i1 @llvm.vector.reduce.xor.v64i1(<64 x i1>) -define signext i1 @vreduce_xor_v64i1(<64 x i1> %v) { -; LMULMAX1-RV32-LABEL: vreduce_xor_v64i1: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmxor.mm v8, v8, v10 -; LMULMAX1-RV32-NEXT: vmxor.mm v9, v0, v9 -; LMULMAX1-RV32-NEXT: vmxor.mm v8, v9, v8 -; LMULMAX1-RV32-NEXT: vcpop.m a0, v8 -; LMULMAX1-RV32-NEXT: slli a0, a0, 31 -; LMULMAX1-RV32-NEXT: srai a0, a0, 31 -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: vreduce_xor_v64i1: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmxor.mm v8, v8, v10 -; LMULMAX1-RV64-NEXT: vmxor.mm v9, v0, v9 -; LMULMAX1-RV64-NEXT: vmxor.mm v8, v9, v8 -; LMULMAX1-RV64-NEXT: vcpop.m a0, v8 -; LMULMAX1-RV64-NEXT: slli a0, a0, 63 -; LMULMAX1-RV64-NEXT: srai a0, a0, 63 -; LMULMAX1-RV64-NEXT: ret -; -; LMULMAX8-RV32-LABEL: vreduce_xor_v64i1: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: li a0, 64 -; LMULMAX8-RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; LMULMAX8-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV32-NEXT: slli a0, a0, 31 -; LMULMAX8-RV32-NEXT: srai a0, a0, 31 -; LMULMAX8-RV32-NEXT: ret +define zeroext i1 @vreduce_xor_v64i1(<64 x i1> %v) { +; LMULMAX1-LABEL: vreduce_xor_v64i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; LMULMAX1-NEXT: vmxor.mm v8, v8, v10 +; LMULMAX1-NEXT: vmxor.mm v9, v0, v9 +; LMULMAX1-NEXT: vmxor.mm v8, v9, v8 +; LMULMAX1-NEXT: vcpop.m a0, v8 +; LMULMAX1-NEXT: andi a0, a0, 1 +; LMULMAX1-NEXT: ret ; -; LMULMAX8-RV64-LABEL: vreduce_xor_v64i1: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: li a0, 64 -; LMULMAX8-RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; LMULMAX8-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV64-NEXT: slli a0, a0, 63 -; LMULMAX8-RV64-NEXT: srai a0, a0, 63 -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: vreduce_xor_v64i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: li a0, 64 +; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; LMULMAX8-NEXT: vcpop.m a0, v0 +; LMULMAX8-NEXT: andi a0, a0, 1 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %v) ret i1 %red } declare i1 @llvm.vector.reduce.and.v64i1(<64 x i1>) -define signext i1 @vreduce_and_v64i1(<64 x i1> %v) { +define zeroext i1 @vreduce_and_v64i1(<64 x i1> %v) { ; LMULMAX1-LABEL: vreduce_and_v64i1: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma @@ -873,8 +686,7 @@ ; LMULMAX1-NEXT: vmand.mm v9, v0, v9 ; LMULMAX1-NEXT: vmnand.mm v8, v9, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: snez a0, a0 -; LMULMAX1-NEXT: addi a0, a0, -1 +; LMULMAX1-NEXT: seqz a0, a0 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_and_v64i1: @@ -883,8 +695,7 @@ ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; LMULMAX8-NEXT: vmnot.m v8, v0 ; LMULMAX8-NEXT: vcpop.m a0, v8 -; LMULMAX8-NEXT: snez a0, a0 -; LMULMAX8-NEXT: addi a0, a0, -1 +; LMULMAX8-NEXT: seqz a0, a0 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %v) ret i1 %red @@ -892,7 +703,7 @@ declare i1 @llvm.vector.reduce.umax.v64i1(<64 x i1>) -define signext i1 @vreduce_umax_v64i1(<64 x i1> %v) { +define zeroext i1 @vreduce_umax_v64i1(<64 x i1> %v) { ; LMULMAX1-LABEL: vreduce_umax_v64i1: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma @@ -900,8 +711,7 @@ ; LMULMAX1-NEXT: vmor.mm v9, v0, v9 ; LMULMAX1-NEXT: vmor.mm v8, v9, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: seqz a0, a0 -; LMULMAX1-NEXT: addi a0, a0, -1 +; LMULMAX1-NEXT: snez a0, a0 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_umax_v64i1: @@ -909,8 +719,7 @@ ; LMULMAX8-NEXT: li a0, 64 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: seqz a0, a0 -; LMULMAX8-NEXT: addi a0, a0, -1 +; LMULMAX8-NEXT: snez a0, a0 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> %v) ret i1 %red @@ -918,7 +727,7 @@ declare i1 @llvm.vector.reduce.smax.v64i1(<64 x i1>) -define signext i1 @vreduce_smax_v64i1(<64 x i1> %v) { +define zeroext i1 @vreduce_smax_v64i1(<64 x i1> %v) { ; LMULMAX1-LABEL: vreduce_smax_v64i1: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma @@ -926,8 +735,7 @@ ; LMULMAX1-NEXT: vmand.mm v9, v0, v9 ; LMULMAX1-NEXT: vmnand.mm v8, v9, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: snez a0, a0 -; LMULMAX1-NEXT: addi a0, a0, -1 +; LMULMAX1-NEXT: seqz a0, a0 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_smax_v64i1: @@ -936,8 +744,7 @@ ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; LMULMAX8-NEXT: vmnot.m v8, v0 ; LMULMAX8-NEXT: vcpop.m a0, v8 -; LMULMAX8-NEXT: snez a0, a0 -; LMULMAX8-NEXT: addi a0, a0, -1 +; LMULMAX8-NEXT: seqz a0, a0 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> %v) ret i1 %red @@ -945,7 +752,7 @@ declare i1 @llvm.vector.reduce.umin.v64i1(<64 x i1>) -define signext i1 @vreduce_umin_v64i1(<64 x i1> %v) { +define zeroext i1 @vreduce_umin_v64i1(<64 x i1> %v) { ; LMULMAX1-LABEL: vreduce_umin_v64i1: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma @@ -953,8 +760,7 @@ ; LMULMAX1-NEXT: vmand.mm v9, v0, v9 ; LMULMAX1-NEXT: vmnand.mm v8, v9, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: snez a0, a0 -; LMULMAX1-NEXT: addi a0, a0, -1 +; LMULMAX1-NEXT: seqz a0, a0 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_umin_v64i1: @@ -963,8 +769,7 @@ ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; LMULMAX8-NEXT: vmnot.m v8, v0 ; LMULMAX8-NEXT: vcpop.m a0, v8 -; LMULMAX8-NEXT: snez a0, a0 -; LMULMAX8-NEXT: addi a0, a0, -1 +; LMULMAX8-NEXT: seqz a0, a0 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> %v) ret i1 %red @@ -972,7 +777,7 @@ declare i1 @llvm.vector.reduce.smin.v64i1(<64 x i1>) -define signext i1 @vreduce_smin_v64i1(<64 x i1> %v) { +define zeroext i1 @vreduce_smin_v64i1(<64 x i1> %v) { ; LMULMAX1-LABEL: vreduce_smin_v64i1: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma @@ -980,8 +785,7 @@ ; LMULMAX1-NEXT: vmor.mm v9, v0, v9 ; LMULMAX1-NEXT: vmor.mm v8, v9, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: seqz a0, a0 -; LMULMAX1-NEXT: addi a0, a0, -1 +; LMULMAX1-NEXT: snez a0, a0 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_smin_v64i1: @@ -989,8 +793,7 @@ ; LMULMAX8-NEXT: li a0, 64 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: seqz a0, a0 -; LMULMAX8-NEXT: addi a0, a0, -1 +; LMULMAX8-NEXT: snez a0, a0 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> %v) ret i1 %red @@ -998,13 +801,12 @@ declare i1 @llvm.vector.reduce.add.v1i1(<1 x i1>) -define signext i1 @vreduce_add_v1i1(<1 x i1> %v) { +define zeroext i1 @vreduce_add_v1i1(<1 x i1> %v) { ; CHECK-LABEL: vreduce_add_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> %v) ret i1 %red @@ -1012,240 +814,103 @@ declare i1 @llvm.vector.reduce.add.v2i1(<2 x i1>) -define signext i1 @vreduce_add_v2i1(<2 x i1> %v) { -; LMULMAX1-RV32-LABEL: vreduce_add_v2i1: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX1-RV32-NEXT: slli a0, a0, 31 -; LMULMAX1-RV32-NEXT: srai a0, a0, 31 -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: vreduce_add_v2i1: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX1-RV64-NEXT: slli a0, a0, 63 -; LMULMAX1-RV64-NEXT: srai a0, a0, 63 -; LMULMAX1-RV64-NEXT: ret -; -; LMULMAX8-RV32-LABEL: vreduce_add_v2i1: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX8-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV32-NEXT: slli a0, a0, 31 -; LMULMAX8-RV32-NEXT: srai a0, a0, 31 -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: vreduce_add_v2i1: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX8-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV64-NEXT: slli a0, a0, 63 -; LMULMAX8-RV64-NEXT: srai a0, a0, 63 -; LMULMAX8-RV64-NEXT: ret +define zeroext i1 @vreduce_add_v2i1(<2 x i1> %v) { +; CHECK-LABEL: vreduce_add_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> %v) ret i1 %red } declare i1 @llvm.vector.reduce.add.v4i1(<4 x i1>) -define signext i1 @vreduce_add_v4i1(<4 x i1> %v) { -; LMULMAX1-RV32-LABEL: vreduce_add_v4i1: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX1-RV32-NEXT: slli a0, a0, 31 -; LMULMAX1-RV32-NEXT: srai a0, a0, 31 -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: vreduce_add_v4i1: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX1-RV64-NEXT: slli a0, a0, 63 -; LMULMAX1-RV64-NEXT: srai a0, a0, 63 -; LMULMAX1-RV64-NEXT: ret -; -; LMULMAX8-RV32-LABEL: vreduce_add_v4i1: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX8-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV32-NEXT: slli a0, a0, 31 -; LMULMAX8-RV32-NEXT: srai a0, a0, 31 -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: vreduce_add_v4i1: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX8-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV64-NEXT: slli a0, a0, 63 -; LMULMAX8-RV64-NEXT: srai a0, a0, 63 -; LMULMAX8-RV64-NEXT: ret +define zeroext i1 @vreduce_add_v4i1(<4 x i1> %v) { +; CHECK-LABEL: vreduce_add_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> %v) ret i1 %red } declare i1 @llvm.vector.reduce.add.v8i1(<8 x i1>) -define signext i1 @vreduce_add_v8i1(<8 x i1> %v) { -; LMULMAX1-RV32-LABEL: vreduce_add_v8i1: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX1-RV32-NEXT: slli a0, a0, 31 -; LMULMAX1-RV32-NEXT: srai a0, a0, 31 -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: vreduce_add_v8i1: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX1-RV64-NEXT: slli a0, a0, 63 -; LMULMAX1-RV64-NEXT: srai a0, a0, 63 -; LMULMAX1-RV64-NEXT: ret -; -; LMULMAX8-RV32-LABEL: vreduce_add_v8i1: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX8-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV32-NEXT: slli a0, a0, 31 -; LMULMAX8-RV32-NEXT: srai a0, a0, 31 -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: vreduce_add_v8i1: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX8-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV64-NEXT: slli a0, a0, 63 -; LMULMAX8-RV64-NEXT: srai a0, a0, 63 -; LMULMAX8-RV64-NEXT: ret +define zeroext i1 @vreduce_add_v8i1(<8 x i1> %v) { +; CHECK-LABEL: vreduce_add_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> %v) ret i1 %red } declare i1 @llvm.vector.reduce.add.v16i1(<16 x i1>) -define signext i1 @vreduce_add_v16i1(<16 x i1> %v) { -; LMULMAX1-RV32-LABEL: vreduce_add_v16i1: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX1-RV32-NEXT: slli a0, a0, 31 -; LMULMAX1-RV32-NEXT: srai a0, a0, 31 -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: vreduce_add_v16i1: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX1-RV64-NEXT: slli a0, a0, 63 -; LMULMAX1-RV64-NEXT: srai a0, a0, 63 -; LMULMAX1-RV64-NEXT: ret -; -; LMULMAX8-RV32-LABEL: vreduce_add_v16i1: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX8-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV32-NEXT: slli a0, a0, 31 -; LMULMAX8-RV32-NEXT: srai a0, a0, 31 -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: vreduce_add_v16i1: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX8-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV64-NEXT: slli a0, a0, 63 -; LMULMAX8-RV64-NEXT: srai a0, a0, 63 -; LMULMAX8-RV64-NEXT: ret +define zeroext i1 @vreduce_add_v16i1(<16 x i1> %v) { +; CHECK-LABEL: vreduce_add_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> %v) ret i1 %red } declare i1 @llvm.vector.reduce.add.v32i1(<32 x i1>) -define signext i1 @vreduce_add_v32i1(<32 x i1> %v) { -; LMULMAX1-RV32-LABEL: vreduce_add_v32i1: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmxor.mm v8, v0, v8 -; LMULMAX1-RV32-NEXT: vcpop.m a0, v8 -; LMULMAX1-RV32-NEXT: slli a0, a0, 31 -; LMULMAX1-RV32-NEXT: srai a0, a0, 31 -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: vreduce_add_v32i1: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmxor.mm v8, v0, v8 -; LMULMAX1-RV64-NEXT: vcpop.m a0, v8 -; LMULMAX1-RV64-NEXT: slli a0, a0, 63 -; LMULMAX1-RV64-NEXT: srai a0, a0, 63 -; LMULMAX1-RV64-NEXT: ret -; -; LMULMAX8-RV32-LABEL: vreduce_add_v32i1: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: li a0, 32 -; LMULMAX8-RV32-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; LMULMAX8-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV32-NEXT: slli a0, a0, 31 -; LMULMAX8-RV32-NEXT: srai a0, a0, 31 -; LMULMAX8-RV32-NEXT: ret +define zeroext i1 @vreduce_add_v32i1(<32 x i1> %v) { +; LMULMAX1-LABEL: vreduce_add_v32i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; LMULMAX1-NEXT: vmxor.mm v8, v0, v8 +; LMULMAX1-NEXT: vcpop.m a0, v8 +; LMULMAX1-NEXT: andi a0, a0, 1 +; LMULMAX1-NEXT: ret ; -; LMULMAX8-RV64-LABEL: vreduce_add_v32i1: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: li a0, 32 -; LMULMAX8-RV64-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; LMULMAX8-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV64-NEXT: slli a0, a0, 63 -; LMULMAX8-RV64-NEXT: srai a0, a0, 63 -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: vreduce_add_v32i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: li a0, 32 +; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; LMULMAX8-NEXT: vcpop.m a0, v0 +; LMULMAX8-NEXT: andi a0, a0, 1 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> %v) ret i1 %red } declare i1 @llvm.vector.reduce.add.v64i1(<64 x i1>) -define signext i1 @vreduce_add_v64i1(<64 x i1> %v) { -; LMULMAX1-RV32-LABEL: vreduce_add_v64i1: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmxor.mm v8, v8, v10 -; LMULMAX1-RV32-NEXT: vmxor.mm v9, v0, v9 -; LMULMAX1-RV32-NEXT: vmxor.mm v8, v9, v8 -; LMULMAX1-RV32-NEXT: vcpop.m a0, v8 -; LMULMAX1-RV32-NEXT: slli a0, a0, 31 -; LMULMAX1-RV32-NEXT: srai a0, a0, 31 -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: vreduce_add_v64i1: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmxor.mm v8, v8, v10 -; LMULMAX1-RV64-NEXT: vmxor.mm v9, v0, v9 -; LMULMAX1-RV64-NEXT: vmxor.mm v8, v9, v8 -; LMULMAX1-RV64-NEXT: vcpop.m a0, v8 -; LMULMAX1-RV64-NEXT: slli a0, a0, 63 -; LMULMAX1-RV64-NEXT: srai a0, a0, 63 -; LMULMAX1-RV64-NEXT: ret -; -; LMULMAX8-RV32-LABEL: vreduce_add_v64i1: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: li a0, 64 -; LMULMAX8-RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; LMULMAX8-RV32-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV32-NEXT: slli a0, a0, 31 -; LMULMAX8-RV32-NEXT: srai a0, a0, 31 -; LMULMAX8-RV32-NEXT: ret +define zeroext i1 @vreduce_add_v64i1(<64 x i1> %v) { +; LMULMAX1-LABEL: vreduce_add_v64i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; LMULMAX1-NEXT: vmxor.mm v8, v8, v10 +; LMULMAX1-NEXT: vmxor.mm v9, v0, v9 +; LMULMAX1-NEXT: vmxor.mm v8, v9, v8 +; LMULMAX1-NEXT: vcpop.m a0, v8 +; LMULMAX1-NEXT: andi a0, a0, 1 +; LMULMAX1-NEXT: ret ; -; LMULMAX8-RV64-LABEL: vreduce_add_v64i1: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: li a0, 64 -; LMULMAX8-RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; LMULMAX8-RV64-NEXT: vcpop.m a0, v0 -; LMULMAX8-RV64-NEXT: slli a0, a0, 63 -; LMULMAX8-RV64-NEXT: srai a0, a0, 63 -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: vreduce_add_v64i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: li a0, 64 +; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; LMULMAX8-NEXT: vcpop.m a0, v0 +; LMULMAX8-NEXT: andi a0, a0, 1 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> %v) ret i1 %red } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; LMULMAX1-RV32: {{.*}} +; LMULMAX1-RV64: {{.*}} +; LMULMAX8-RV32: {{.*}} +; LMULMAX8-RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll @@ -4,7 +4,7 @@ declare i1 @llvm.vp.reduce.and.nxv1i1(i1, , , i32) -define signext i1 @vpreduce_and_nxv1i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_and_nxv1i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -13,7 +13,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.and.nxv1i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -21,65 +20,39 @@ declare i1 @llvm.vp.reduce.or.nxv1i1(i1, , , i32) -define signext i1 @vpreduce_or_nxv1i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_or_nxv1i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_or_nxv1i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_or_nxv1i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_or_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.or.nxv1i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.xor.nxv1i1(i1, , , i32) -define signext i1 @vpreduce_xor_nxv1i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_xor_nxv1i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_xor_nxv1i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_xor_nxv1i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_xor_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.nxv1i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.and.nxv2i1(i1, , , i32) -define signext i1 @vpreduce_and_nxv2i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_and_nxv2i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -88,7 +61,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.and.nxv2i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -96,65 +68,39 @@ declare i1 @llvm.vp.reduce.or.nxv2i1(i1, , , i32) -define signext i1 @vpreduce_or_nxv2i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_or_nxv2i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_or_nxv2i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_or_nxv2i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_or_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.or.nxv2i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.xor.nxv2i1(i1, , , i32) -define signext i1 @vpreduce_xor_nxv2i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_xor_nxv2i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_xor_nxv2i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_xor_nxv2i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_xor_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.nxv2i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.and.nxv4i1(i1, , , i32) -define signext i1 @vpreduce_and_nxv4i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_and_nxv4i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -163,7 +109,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.and.nxv4i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -171,65 +116,39 @@ declare i1 @llvm.vp.reduce.or.nxv4i1(i1, , , i32) -define signext i1 @vpreduce_or_nxv4i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_or_nxv4i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_or_nxv4i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_or_nxv4i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_or_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.or.nxv4i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.xor.nxv4i1(i1, , , i32) -define signext i1 @vpreduce_xor_nxv4i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_xor_nxv4i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_xor_nxv4i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_xor_nxv4i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_xor_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.nxv4i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.and.nxv8i1(i1, , , i32) -define signext i1 @vpreduce_and_nxv8i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_and_nxv8i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -238,7 +157,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.and.nxv8i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -246,65 +164,39 @@ declare i1 @llvm.vp.reduce.or.nxv8i1(i1, , , i32) -define signext i1 @vpreduce_or_nxv8i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_or_nxv8i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_or_nxv8i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_or_nxv8i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_or_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.or.nxv8i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.xor.nxv8i1(i1, , , i32) -define signext i1 @vpreduce_xor_nxv8i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_xor_nxv8i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_xor_nxv8i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_xor_nxv8i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_xor_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.nxv8i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.and.nxv16i1(i1, , , i32) -define signext i1 @vpreduce_and_nxv16i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_and_nxv16i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -313,7 +205,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.and.nxv16i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -321,65 +212,39 @@ declare i1 @llvm.vp.reduce.or.nxv16i1(i1, , , i32) -define signext i1 @vpreduce_or_nxv16i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_or_nxv16i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_or_nxv16i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_or_nxv16i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_or_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.or.nxv16i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.xor.nxv16i1(i1, , , i32) -define signext i1 @vpreduce_xor_nxv16i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_xor_nxv16i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_xor_nxv16i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_xor_nxv16i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_xor_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.nxv16i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.and.nxv32i1(i1, , , i32) -define signext i1 @vpreduce_and_nxv32i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_and_nxv32i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -388,7 +253,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.and.nxv32i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -396,95 +260,55 @@ declare i1 @llvm.vp.reduce.or.nxv32i1(i1, , , i32) -define signext i1 @vpreduce_or_nxv32i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_or_nxv32i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_or_nxv32i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_or_nxv32i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_or_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.or.nxv32i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.xor.nxv32i1(i1, , , i32) -define signext i1 @vpreduce_xor_nxv32i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_xor_nxv32i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_xor_nxv32i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_xor_nxv32i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_xor_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.nxv32i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.or.nxv40i1(i1, , , i32) -define signext i1 @vpreduce_or_nxv40i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_or_nxv40i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_or_nxv40i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_or_nxv40i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_or_nxv40i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.or.nxv40i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.and.nxv64i1(i1, , , i32) -define signext i1 @vpreduce_and_nxv64i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_and_nxv64i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma @@ -493,7 +317,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.and.nxv64i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -501,314 +324,175 @@ declare i1 @llvm.vp.reduce.or.nxv64i1(i1, , , i32) -define signext i1 @vpreduce_or_nxv64i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_or_nxv64i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_or_nxv64i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_or_nxv64i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_or_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.or.nxv64i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.xor.nxv64i1(i1, , , i32) -define signext i1 @vpreduce_xor_nxv64i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_xor_nxv64i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_xor_nxv64i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_xor_nxv64i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_xor_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.nxv64i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.or.nxv128i1(i1, , , i32) -define signext i1 @vpreduce_or_nxv128i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_or_nxv128i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v11, v0 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: sub a3, a1, a2 -; RV32-NEXT: sltu a4, a1, a3 -; RV32-NEXT: addi a4, a4, -1 -; RV32-NEXT: and a3, a4, a3 -; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vcpop.m a3, v8, v0.t -; RV32-NEXT: snez a3, a3 -; RV32-NEXT: bltu a1, a2, .LBB22_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB22_2: -; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v9 -; RV32-NEXT: vcpop.m a1, v11, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a3, a0 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_or_nxv128i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: sub a3, a1, a2 -; RV64-NEXT: sltu a4, a1, a3 -; RV64-NEXT: addi a4, a4, -1 -; RV64-NEXT: and a3, a4, a3 -; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vcpop.m a3, v8, v0.t -; RV64-NEXT: snez a3, a3 -; RV64-NEXT: bltu a1, a2, .LBB22_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB22_2: -; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v9 -; RV64-NEXT: vcpop.m a1, v11, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a3, a0 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_or_nxv128i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_or_nxv128i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vcpop.m a3, v8, v0.t +; CHECK-NEXT: snez a3, a3 +; CHECK-NEXT: bltu a1, a2, .LBB22_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vcpop.m a1, v11, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a3, a0 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.or.nxv128i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.add.nxv1i1(i1, , , i32) -define signext i1 @vpreduce_add_nxv1i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_add_nxv1i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_add_nxv1i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_add_nxv1i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_add_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.add.nxv1i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.add.nxv2i1(i1, , , i32) -define signext i1 @vpreduce_add_nxv2i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_add_nxv2i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_add_nxv2i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_add_nxv2i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_add_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.add.nxv2i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.add.nxv4i1(i1, , , i32) -define signext i1 @vpreduce_add_nxv4i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_add_nxv4i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_add_nxv4i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_add_nxv4i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_add_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.add.nxv4i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.add.nxv8i1(i1, , , i32) -define signext i1 @vpreduce_add_nxv8i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_add_nxv8i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_add_nxv8i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_add_nxv8i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_add_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.add.nxv8i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.add.nxv16i1(i1, , , i32) -define signext i1 @vpreduce_add_nxv16i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_add_nxv16i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_add_nxv16i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_add_nxv16i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_add_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.add.nxv16i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.add.nxv32i1(i1, , , i32) -define signext i1 @vpreduce_add_nxv32i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_add_nxv32i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_add_nxv32i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_add_nxv32i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_add_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.add.nxv32i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.add.nxv64i1(i1, , , i32) -define signext i1 @vpreduce_add_nxv64i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_add_nxv64i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: xor a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_add_nxv64i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: xor a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_add_nxv64i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_add_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.add.nxv64i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -816,7 +500,7 @@ declare i1 @llvm.vp.reduce.smax.nxv1i1(i1, , , i32) -define signext i1 @vpreduce_smax_nxv1i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_smax_nxv1i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -825,7 +509,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smax.nxv1i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -833,7 +516,7 @@ declare i1 @llvm.vp.reduce.smax.nxv2i1(i1, , , i32) -define signext i1 @vpreduce_smax_nxv2i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_smax_nxv2i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -842,7 +525,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smax.nxv2i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -850,7 +532,7 @@ declare i1 @llvm.vp.reduce.smax.nxv4i1(i1, , , i32) -define signext i1 @vpreduce_smax_nxv4i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_smax_nxv4i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -859,7 +541,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smax.nxv4i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -867,7 +548,7 @@ declare i1 @llvm.vp.reduce.smax.nxv8i1(i1, , , i32) -define signext i1 @vpreduce_smax_nxv8i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_smax_nxv8i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -876,7 +557,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smax.nxv8i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -884,7 +564,7 @@ declare i1 @llvm.vp.reduce.smax.nxv16i1(i1, , , i32) -define signext i1 @vpreduce_smax_nxv16i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_smax_nxv16i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -893,7 +573,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smax.nxv16i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -901,7 +580,7 @@ declare i1 @llvm.vp.reduce.smax.nxv32i1(i1, , , i32) -define signext i1 @vpreduce_smax_nxv32i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_smax_nxv32i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -910,7 +589,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smax.nxv32i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -918,7 +596,7 @@ declare i1 @llvm.vp.reduce.smax.nxv64i1(i1, , , i32) -define signext i1 @vpreduce_smax_nxv64i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_smax_nxv64i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma @@ -927,7 +605,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smax.nxv64i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -935,427 +612,231 @@ declare i1 @llvm.vp.reduce.smin.nxv1i1(i1, , , i32) -define signext i1 @vpreduce_smin_nxv1i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_smin_nxv1i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_smin_nxv1i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_smin_nxv1i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_smin_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.nxv1i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.smin.nxv2i1(i1, , , i32) -define signext i1 @vpreduce_smin_nxv2i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_smin_nxv2i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_smin_nxv2i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_smin_nxv2i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_smin_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.nxv2i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.smin.nxv4i1(i1, , , i32) -define signext i1 @vpreduce_smin_nxv4i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_smin_nxv4i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_smin_nxv4i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_smin_nxv4i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_smin_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.nxv4i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.smin.nxv8i1(i1, , , i32) -define signext i1 @vpreduce_smin_nxv8i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_smin_nxv8i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_smin_nxv8i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_smin_nxv8i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_smin_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.nxv8i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.smin.nxv16i1(i1, , , i32) -define signext i1 @vpreduce_smin_nxv16i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_smin_nxv16i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_smin_nxv16i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_smin_nxv16i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_smin_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.nxv16i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.smin.nxv32i1(i1, , , i32) -define signext i1 @vpreduce_smin_nxv32i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_smin_nxv32i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_smin_nxv32i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_smin_nxv32i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_smin_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.nxv32i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.smin.nxv64i1(i1, , , i32) -define signext i1 @vpreduce_smin_nxv64i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_smin_nxv64i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_smin_nxv64i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_smin_nxv64i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_smin_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.nxv64i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.umax.nxv1i1(i1, , , i32) -define signext i1 @vpreduce_umax_nxv1i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_nxv1i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_nxv1i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_umax_nxv1i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_umax_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.nxv1i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.umax.nxv2i1(i1, , , i32) -define signext i1 @vpreduce_umax_nxv2i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_nxv2i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_nxv2i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_umax_nxv2i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_umax_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.nxv2i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.umax.nxv4i1(i1, , , i32) -define signext i1 @vpreduce_umax_nxv4i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_nxv4i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_nxv4i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_umax_nxv4i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_umax_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.nxv4i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.umax.nxv8i1(i1, , , i32) -define signext i1 @vpreduce_umax_nxv8i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_nxv8i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_nxv8i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_umax_nxv8i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_umax_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.nxv8i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.umax.nxv16i1(i1, , , i32) -define signext i1 @vpreduce_umax_nxv16i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_nxv16i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_nxv16i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_umax_nxv16i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_umax_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.nxv16i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.umax.nxv32i1(i1, , , i32) -define signext i1 @vpreduce_umax_nxv32i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_nxv32i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_nxv32i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_umax_nxv32i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_umax_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.nxv32i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.umax.nxv64i1(i1, , , i32) -define signext i1 @vpreduce_umax_nxv64i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_nxv64i1: -; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vcpop.m a1, v9, v0.t -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_nxv64i1: -; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vcpop.m a1, v9, v0.t -; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vpreduce_umax_nxv64i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_umax_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a1, v9, v0.t +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.nxv64i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } declare i1 @llvm.vp.reduce.umin.nxv1i1(i1, , , i32) -define signext i1 @vpreduce_umin_nxv1i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_umin_nxv1i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -1364,7 +845,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umin.nxv1i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -1372,7 +852,7 @@ declare i1 @llvm.vp.reduce.umin.nxv2i1(i1, , , i32) -define signext i1 @vpreduce_umin_nxv2i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_umin_nxv2i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -1381,7 +861,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umin.nxv2i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -1389,7 +868,7 @@ declare i1 @llvm.vp.reduce.umin.nxv4i1(i1, , , i32) -define signext i1 @vpreduce_umin_nxv4i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_umin_nxv4i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -1398,7 +877,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umin.nxv4i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -1406,7 +884,7 @@ declare i1 @llvm.vp.reduce.umin.nxv8i1(i1, , , i32) -define signext i1 @vpreduce_umin_nxv8i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_umin_nxv8i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -1415,7 +893,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umin.nxv8i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -1423,7 +900,7 @@ declare i1 @llvm.vp.reduce.umin.nxv16i1(i1, , , i32) -define signext i1 @vpreduce_umin_nxv16i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_umin_nxv16i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_nxv16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -1432,7 +909,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umin.nxv16i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -1440,7 +916,7 @@ declare i1 @llvm.vp.reduce.umin.nxv32i1(i1, , , i32) -define signext i1 @vpreduce_umin_nxv32i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_umin_nxv32i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -1449,7 +925,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umin.nxv32i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -1457,7 +932,7 @@ declare i1 @llvm.vp.reduce.umin.nxv64i1(i1, , , i32) -define signext i1 @vpreduce_umin_nxv64i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_umin_nxv64i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_nxv64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma @@ -1466,7 +941,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.umin.nxv64i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -1474,7 +948,7 @@ declare i1 @llvm.vp.reduce.mul.nxv1i1(i1, , , i32) -define signext i1 @vpreduce_mul_nxv1i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_mul_nxv1i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_mul_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -1483,7 +957,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.mul.nxv1i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -1491,7 +964,7 @@ declare i1 @llvm.vp.reduce.mul.nxv2i1(i1, , , i32) -define signext i1 @vpreduce_mul_nxv2i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_mul_nxv2i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_mul_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -1500,7 +973,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.mul.nxv2i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -1508,7 +980,7 @@ declare i1 @llvm.vp.reduce.mul.nxv4i1(i1, , , i32) -define signext i1 @vpreduce_mul_nxv4i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_mul_nxv4i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_mul_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -1517,7 +989,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.mul.nxv4i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -1525,7 +996,7 @@ declare i1 @llvm.vp.reduce.mul.nxv8i1(i1, , , i32) -define signext i1 @vpreduce_mul_nxv8i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_mul_nxv8i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_mul_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -1534,7 +1005,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.mul.nxv8i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -1542,7 +1012,7 @@ declare i1 @llvm.vp.reduce.mul.nxv16i1(i1, , , i32) -define signext i1 @vpreduce_mul_nxv16i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_mul_nxv16i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_mul_nxv16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -1551,7 +1021,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.mul.nxv16i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -1559,7 +1028,7 @@ declare i1 @llvm.vp.reduce.mul.nxv32i1(i1, , , i32) -define signext i1 @vpreduce_mul_nxv32i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_mul_nxv32i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_mul_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -1568,7 +1037,6 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.mul.nxv32i1(i1 %s, %v, %m, i32 %evl) ret i1 %r @@ -1576,7 +1044,7 @@ declare i1 @llvm.vp.reduce.mul.nxv64i1(i1, , , i32) -define signext i1 @vpreduce_mul_nxv64i1(i1 signext %s, %v, %m, i32 zeroext %evl) { +define zeroext i1 @vpreduce_mul_nxv64i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_mul_nxv64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma @@ -1585,8 +1053,10 @@ ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.mul.nxv64i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll @@ -4,13 +4,12 @@ declare i1 @llvm.vector.reduce.or.nxv1i1() -define signext i1 @vreduce_or_nxv1i1( %v) { +define zeroext i1 @vreduce_or_nxv1i1( %v) { ; CHECK-LABEL: vreduce_or_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.nxv1i1( %v) ret i1 %red @@ -18,36 +17,26 @@ declare i1 @llvm.vector.reduce.xor.nxv1i1() -define signext i1 @vreduce_xor_nxv1i1( %v) { -; RV32-LABEL: vreduce_xor_nxv1i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; RV32-NEXT: vcpop.m a0, v0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_xor_nxv1i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; RV64-NEXT: vcpop.m a0, v0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vreduce_xor_nxv1i1( %v) { +; CHECK-LABEL: vreduce_xor_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.nxv1i1( %v) ret i1 %red } declare i1 @llvm.vector.reduce.and.nxv1i1() -define signext i1 @vreduce_and_nxv1i1( %v) { +define zeroext i1 @vreduce_and_nxv1i1( %v) { ; CHECK-LABEL: vreduce_and_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.nxv1i1( %v) ret i1 %red @@ -55,13 +44,12 @@ declare i1 @llvm.vector.reduce.umax.nxv1i1() -define signext i1 @vreduce_umax_nxv1i1( %v) { +define zeroext i1 @vreduce_umax_nxv1i1( %v) { ; CHECK-LABEL: vreduce_umax_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.nxv1i1( %v) ret i1 %red @@ -69,14 +57,13 @@ declare i1 @llvm.vector.reduce.smax.nxv1i1() -define signext i1 @vreduce_smax_nxv1i1( %v) { +define zeroext i1 @vreduce_smax_nxv1i1( %v) { ; CHECK-LABEL: vreduce_smax_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.nxv1i1( %v) ret i1 %red @@ -84,14 +71,13 @@ declare i1 @llvm.vector.reduce.umin.nxv1i1() -define signext i1 @vreduce_umin_nxv1i1( %v) { +define zeroext i1 @vreduce_umin_nxv1i1( %v) { ; CHECK-LABEL: vreduce_umin_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.nxv1i1( %v) ret i1 %red @@ -99,13 +85,12 @@ declare i1 @llvm.vector.reduce.smin.nxv1i1() -define signext i1 @vreduce_smin_nxv1i1( %v) { +define zeroext i1 @vreduce_smin_nxv1i1( %v) { ; CHECK-LABEL: vreduce_smin_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.nxv1i1( %v) ret i1 %red @@ -113,13 +98,12 @@ declare i1 @llvm.vector.reduce.or.nxv2i1() -define signext i1 @vreduce_or_nxv2i1( %v) { +define zeroext i1 @vreduce_or_nxv2i1( %v) { ; CHECK-LABEL: vreduce_or_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.nxv2i1( %v) ret i1 %red @@ -127,36 +111,26 @@ declare i1 @llvm.vector.reduce.xor.nxv2i1() -define signext i1 @vreduce_xor_nxv2i1( %v) { -; RV32-LABEL: vreduce_xor_nxv2i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; RV32-NEXT: vcpop.m a0, v0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_xor_nxv2i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; RV64-NEXT: vcpop.m a0, v0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vreduce_xor_nxv2i1( %v) { +; CHECK-LABEL: vreduce_xor_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.nxv2i1( %v) ret i1 %red } declare i1 @llvm.vector.reduce.and.nxv2i1() -define signext i1 @vreduce_and_nxv2i1( %v) { +define zeroext i1 @vreduce_and_nxv2i1( %v) { ; CHECK-LABEL: vreduce_and_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.nxv2i1( %v) ret i1 %red @@ -164,13 +138,12 @@ declare i1 @llvm.vector.reduce.umax.nxv2i1() -define signext i1 @vreduce_umax_nxv2i1( %v) { +define zeroext i1 @vreduce_umax_nxv2i1( %v) { ; CHECK-LABEL: vreduce_umax_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.nxv2i1( %v) ret i1 %red @@ -178,14 +151,13 @@ declare i1 @llvm.vector.reduce.smax.nxv2i1() -define signext i1 @vreduce_smax_nxv2i1( %v) { +define zeroext i1 @vreduce_smax_nxv2i1( %v) { ; CHECK-LABEL: vreduce_smax_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.nxv2i1( %v) ret i1 %red @@ -193,14 +165,13 @@ declare i1 @llvm.vector.reduce.umin.nxv2i1() -define signext i1 @vreduce_umin_nxv2i1( %v) { +define zeroext i1 @vreduce_umin_nxv2i1( %v) { ; CHECK-LABEL: vreduce_umin_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.nxv2i1( %v) ret i1 %red @@ -208,13 +179,12 @@ declare i1 @llvm.vector.reduce.smin.nxv2i1() -define signext i1 @vreduce_smin_nxv2i1( %v) { +define zeroext i1 @vreduce_smin_nxv2i1( %v) { ; CHECK-LABEL: vreduce_smin_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.nxv2i1( %v) ret i1 %red @@ -222,13 +192,12 @@ declare i1 @llvm.vector.reduce.or.nxv4i1() -define signext i1 @vreduce_or_nxv4i1( %v) { +define zeroext i1 @vreduce_or_nxv4i1( %v) { ; CHECK-LABEL: vreduce_or_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.nxv4i1( %v) ret i1 %red @@ -236,36 +205,26 @@ declare i1 @llvm.vector.reduce.xor.nxv4i1() -define signext i1 @vreduce_xor_nxv4i1( %v) { -; RV32-LABEL: vreduce_xor_nxv4i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; RV32-NEXT: vcpop.m a0, v0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_xor_nxv4i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; RV64-NEXT: vcpop.m a0, v0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vreduce_xor_nxv4i1( %v) { +; CHECK-LABEL: vreduce_xor_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.nxv4i1( %v) ret i1 %red } declare i1 @llvm.vector.reduce.and.nxv4i1() -define signext i1 @vreduce_and_nxv4i1( %v) { +define zeroext i1 @vreduce_and_nxv4i1( %v) { ; CHECK-LABEL: vreduce_and_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.nxv4i1( %v) ret i1 %red @@ -273,13 +232,12 @@ declare i1 @llvm.vector.reduce.umax.nxv4i1() -define signext i1 @vreduce_umax_nxv4i1( %v) { +define zeroext i1 @vreduce_umax_nxv4i1( %v) { ; CHECK-LABEL: vreduce_umax_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.nxv4i1( %v) ret i1 %red @@ -287,14 +245,13 @@ declare i1 @llvm.vector.reduce.smax.nxv4i1() -define signext i1 @vreduce_smax_nxv4i1( %v) { +define zeroext i1 @vreduce_smax_nxv4i1( %v) { ; CHECK-LABEL: vreduce_smax_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.nxv4i1( %v) ret i1 %red @@ -302,14 +259,13 @@ declare i1 @llvm.vector.reduce.umin.nxv4i1() -define signext i1 @vreduce_umin_nxv4i1( %v) { +define zeroext i1 @vreduce_umin_nxv4i1( %v) { ; CHECK-LABEL: vreduce_umin_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.nxv4i1( %v) ret i1 %red @@ -317,13 +273,12 @@ declare i1 @llvm.vector.reduce.smin.nxv4i1() -define signext i1 @vreduce_smin_nxv4i1( %v) { +define zeroext i1 @vreduce_smin_nxv4i1( %v) { ; CHECK-LABEL: vreduce_smin_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.nxv4i1( %v) ret i1 %red @@ -331,13 +286,12 @@ declare i1 @llvm.vector.reduce.or.nxv8i1() -define signext i1 @vreduce_or_nxv8i1( %v) { +define zeroext i1 @vreduce_or_nxv8i1( %v) { ; CHECK-LABEL: vreduce_or_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.nxv8i1( %v) ret i1 %red @@ -345,36 +299,26 @@ declare i1 @llvm.vector.reduce.xor.nxv8i1() -define signext i1 @vreduce_xor_nxv8i1( %v) { -; RV32-LABEL: vreduce_xor_nxv8i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; RV32-NEXT: vcpop.m a0, v0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_xor_nxv8i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; RV64-NEXT: vcpop.m a0, v0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vreduce_xor_nxv8i1( %v) { +; CHECK-LABEL: vreduce_xor_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.nxv8i1( %v) ret i1 %red } declare i1 @llvm.vector.reduce.and.nxv8i1() -define signext i1 @vreduce_and_nxv8i1( %v) { +define zeroext i1 @vreduce_and_nxv8i1( %v) { ; CHECK-LABEL: vreduce_and_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.nxv8i1( %v) ret i1 %red @@ -382,13 +326,12 @@ declare i1 @llvm.vector.reduce.umax.nxv8i1() -define signext i1 @vreduce_umax_nxv8i1( %v) { +define zeroext i1 @vreduce_umax_nxv8i1( %v) { ; CHECK-LABEL: vreduce_umax_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.nxv8i1( %v) ret i1 %red @@ -396,14 +339,13 @@ declare i1 @llvm.vector.reduce.smax.nxv8i1() -define signext i1 @vreduce_smax_nxv8i1( %v) { +define zeroext i1 @vreduce_smax_nxv8i1( %v) { ; CHECK-LABEL: vreduce_smax_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.nxv8i1( %v) ret i1 %red @@ -411,14 +353,13 @@ declare i1 @llvm.vector.reduce.umin.nxv8i1() -define signext i1 @vreduce_umin_nxv8i1( %v) { +define zeroext i1 @vreduce_umin_nxv8i1( %v) { ; CHECK-LABEL: vreduce_umin_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.nxv8i1( %v) ret i1 %red @@ -426,13 +367,12 @@ declare i1 @llvm.vector.reduce.smin.nxv8i1() -define signext i1 @vreduce_smin_nxv8i1( %v) { +define zeroext i1 @vreduce_smin_nxv8i1( %v) { ; CHECK-LABEL: vreduce_smin_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.nxv8i1( %v) ret i1 %red @@ -440,13 +380,12 @@ declare i1 @llvm.vector.reduce.or.nxv16i1() -define signext i1 @vreduce_or_nxv16i1( %v) { +define zeroext i1 @vreduce_or_nxv16i1( %v) { ; CHECK-LABEL: vreduce_or_nxv16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.nxv16i1( %v) ret i1 %red @@ -454,36 +393,26 @@ declare i1 @llvm.vector.reduce.xor.nxv16i1() -define signext i1 @vreduce_xor_nxv16i1( %v) { -; RV32-LABEL: vreduce_xor_nxv16i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; RV32-NEXT: vcpop.m a0, v0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_xor_nxv16i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; RV64-NEXT: vcpop.m a0, v0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vreduce_xor_nxv16i1( %v) { +; CHECK-LABEL: vreduce_xor_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.nxv16i1( %v) ret i1 %red } declare i1 @llvm.vector.reduce.and.nxv16i1() -define signext i1 @vreduce_and_nxv16i1( %v) { +define zeroext i1 @vreduce_and_nxv16i1( %v) { ; CHECK-LABEL: vreduce_and_nxv16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.nxv16i1( %v) ret i1 %red @@ -491,13 +420,12 @@ declare i1 @llvm.vector.reduce.umax.nxv16i1() -define signext i1 @vreduce_umax_nxv16i1( %v) { +define zeroext i1 @vreduce_umax_nxv16i1( %v) { ; CHECK-LABEL: vreduce_umax_nxv16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.nxv16i1( %v) ret i1 %red @@ -505,14 +433,13 @@ declare i1 @llvm.vector.reduce.smax.nxv16i1() -define signext i1 @vreduce_smax_nxv16i1( %v) { +define zeroext i1 @vreduce_smax_nxv16i1( %v) { ; CHECK-LABEL: vreduce_smax_nxv16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.nxv16i1( %v) ret i1 %red @@ -520,14 +447,13 @@ declare i1 @llvm.vector.reduce.umin.nxv16i1() -define signext i1 @vreduce_umin_nxv16i1( %v) { +define zeroext i1 @vreduce_umin_nxv16i1( %v) { ; CHECK-LABEL: vreduce_umin_nxv16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.nxv16i1( %v) ret i1 %red @@ -535,13 +461,12 @@ declare i1 @llvm.vector.reduce.smin.nxv16i1() -define signext i1 @vreduce_smin_nxv16i1( %v) { +define zeroext i1 @vreduce_smin_nxv16i1( %v) { ; CHECK-LABEL: vreduce_smin_nxv16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.nxv16i1( %v) ret i1 %red @@ -549,13 +474,12 @@ declare i1 @llvm.vector.reduce.or.nxv32i1() -define signext i1 @vreduce_or_nxv32i1( %v) { +define zeroext i1 @vreduce_or_nxv32i1( %v) { ; CHECK-LABEL: vreduce_or_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.nxv32i1( %v) ret i1 %red @@ -563,36 +487,26 @@ declare i1 @llvm.vector.reduce.xor.nxv32i1() -define signext i1 @vreduce_xor_nxv32i1( %v) { -; RV32-LABEL: vreduce_xor_nxv32i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV32-NEXT: vcpop.m a0, v0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_xor_nxv32i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV64-NEXT: vcpop.m a0, v0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vreduce_xor_nxv32i1( %v) { +; CHECK-LABEL: vreduce_xor_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.nxv32i1( %v) ret i1 %red } declare i1 @llvm.vector.reduce.and.nxv32i1() -define signext i1 @vreduce_and_nxv32i1( %v) { +define zeroext i1 @vreduce_and_nxv32i1( %v) { ; CHECK-LABEL: vreduce_and_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.nxv32i1( %v) ret i1 %red @@ -600,13 +514,12 @@ declare i1 @llvm.vector.reduce.umax.nxv32i1() -define signext i1 @vreduce_umax_nxv32i1( %v) { +define zeroext i1 @vreduce_umax_nxv32i1( %v) { ; CHECK-LABEL: vreduce_umax_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.nxv32i1( %v) ret i1 %red @@ -614,14 +527,13 @@ declare i1 @llvm.vector.reduce.smax.nxv32i1() -define signext i1 @vreduce_smax_nxv32i1( %v) { +define zeroext i1 @vreduce_smax_nxv32i1( %v) { ; CHECK-LABEL: vreduce_smax_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.nxv32i1( %v) ret i1 %red @@ -629,14 +541,13 @@ declare i1 @llvm.vector.reduce.umin.nxv32i1() -define signext i1 @vreduce_umin_nxv32i1( %v) { +define zeroext i1 @vreduce_umin_nxv32i1( %v) { ; CHECK-LABEL: vreduce_umin_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.nxv32i1( %v) ret i1 %red @@ -644,13 +555,12 @@ declare i1 @llvm.vector.reduce.smin.nxv32i1() -define signext i1 @vreduce_smin_nxv32i1( %v) { +define zeroext i1 @vreduce_smin_nxv32i1( %v) { ; CHECK-LABEL: vreduce_smin_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.nxv32i1( %v) ret i1 %red @@ -658,13 +568,12 @@ declare i1 @llvm.vector.reduce.or.nxv64i1() -define signext i1 @vreduce_or_nxv64i1( %v) { +define zeroext i1 @vreduce_or_nxv64i1( %v) { ; CHECK-LABEL: vreduce_or_nxv64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.nxv64i1( %v) ret i1 %red @@ -672,36 +581,26 @@ declare i1 @llvm.vector.reduce.xor.nxv64i1() -define signext i1 @vreduce_xor_nxv64i1( %v) { -; RV32-LABEL: vreduce_xor_nxv64i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV32-NEXT: vcpop.m a0, v0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_xor_nxv64i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV64-NEXT: vcpop.m a0, v0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vreduce_xor_nxv64i1( %v) { +; CHECK-LABEL: vreduce_xor_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.nxv64i1( %v) ret i1 %red } declare i1 @llvm.vector.reduce.and.nxv64i1() -define signext i1 @vreduce_and_nxv64i1( %v) { +define zeroext i1 @vreduce_and_nxv64i1( %v) { ; CHECK-LABEL: vreduce_and_nxv64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.nxv64i1( %v) ret i1 %red @@ -709,13 +608,12 @@ declare i1 @llvm.vector.reduce.umax.nxv64i1() -define signext i1 @vreduce_umax_nxv64i1( %v) { +define zeroext i1 @vreduce_umax_nxv64i1( %v) { ; CHECK-LABEL: vreduce_umax_nxv64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.nxv64i1( %v) ret i1 %red @@ -723,14 +621,13 @@ declare i1 @llvm.vector.reduce.smax.nxv64i1() -define signext i1 @vreduce_smax_nxv64i1( %v) { +define zeroext i1 @vreduce_smax_nxv64i1( %v) { ; CHECK-LABEL: vreduce_smax_nxv64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.nxv64i1( %v) ret i1 %red @@ -738,14 +635,13 @@ declare i1 @llvm.vector.reduce.umin.nxv64i1() -define signext i1 @vreduce_umin_nxv64i1( %v) { +define zeroext i1 @vreduce_umin_nxv64i1( %v) { ; CHECK-LABEL: vreduce_umin_nxv64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.nxv64i1( %v) ret i1 %red @@ -753,13 +649,12 @@ declare i1 @llvm.vector.reduce.smin.nxv64i1() -define signext i1 @vreduce_smin_nxv64i1( %v) { +define zeroext i1 @vreduce_smin_nxv64i1( %v) { ; CHECK-LABEL: vreduce_smin_nxv64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.nxv64i1( %v) ret i1 %red @@ -767,154 +662,94 @@ declare i1 @llvm.vector.reduce.add.nxv1i1() -define signext i1 @vreduce_add_nxv1i1( %v) { -; RV32-LABEL: vreduce_add_nxv1i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; RV32-NEXT: vcpop.m a0, v0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_add_nxv1i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; RV64-NEXT: vcpop.m a0, v0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vreduce_add_nxv1i1( %v) { +; CHECK-LABEL: vreduce_add_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.nxv1i1( %v) ret i1 %red } declare i1 @llvm.vector.reduce.add.nxv2i1() -define signext i1 @vreduce_add_nxv2i1( %v) { -; RV32-LABEL: vreduce_add_nxv2i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; RV32-NEXT: vcpop.m a0, v0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_add_nxv2i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; RV64-NEXT: vcpop.m a0, v0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vreduce_add_nxv2i1( %v) { +; CHECK-LABEL: vreduce_add_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.nxv2i1( %v) ret i1 %red } declare i1 @llvm.vector.reduce.add.nxv4i1() -define signext i1 @vreduce_add_nxv4i1( %v) { -; RV32-LABEL: vreduce_add_nxv4i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; RV32-NEXT: vcpop.m a0, v0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_add_nxv4i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; RV64-NEXT: vcpop.m a0, v0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vreduce_add_nxv4i1( %v) { +; CHECK-LABEL: vreduce_add_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.nxv4i1( %v) ret i1 %red } declare i1 @llvm.vector.reduce.add.nxv8i1() -define signext i1 @vreduce_add_nxv8i1( %v) { -; RV32-LABEL: vreduce_add_nxv8i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; RV32-NEXT: vcpop.m a0, v0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_add_nxv8i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; RV64-NEXT: vcpop.m a0, v0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vreduce_add_nxv8i1( %v) { +; CHECK-LABEL: vreduce_add_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.nxv8i1( %v) ret i1 %red } declare i1 @llvm.vector.reduce.add.nxv16i1() -define signext i1 @vreduce_add_nxv16i1( %v) { -; RV32-LABEL: vreduce_add_nxv16i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; RV32-NEXT: vcpop.m a0, v0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_add_nxv16i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; RV64-NEXT: vcpop.m a0, v0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vreduce_add_nxv16i1( %v) { +; CHECK-LABEL: vreduce_add_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.nxv16i1( %v) ret i1 %red } declare i1 @llvm.vector.reduce.add.nxv32i1() -define signext i1 @vreduce_add_nxv32i1( %v) { -; RV32-LABEL: vreduce_add_nxv32i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV32-NEXT: vcpop.m a0, v0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_add_nxv32i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV64-NEXT: vcpop.m a0, v0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vreduce_add_nxv32i1( %v) { +; CHECK-LABEL: vreduce_add_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.nxv32i1( %v) ret i1 %red } declare i1 @llvm.vector.reduce.add.nxv64i1() -define signext i1 @vreduce_add_nxv64i1( %v) { -; RV32-LABEL: vreduce_add_nxv64i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV32-NEXT: vcpop.m a0, v0 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_add_nxv64i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV64-NEXT: vcpop.m a0, v0 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: ret +define zeroext i1 @vreduce_add_nxv64i1( %v) { +; CHECK-LABEL: vreduce_add_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.nxv64i1( %v) ret i1 %red } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}}