diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1201,6 +1201,7 @@ return N->hasOneUse(); }]>; +def and_oneuse : binop_oneuse; def add_oneuse : binop_oneuse; def mul_oneuse : binop_oneuse; @@ -1238,6 +1239,12 @@ def : PatGprUimmLog2XLen; def : PatGprUimmLog2XLen; +// negate of low bit can be done via two (compressible) shifts. The negate +// is never compressible since rs1 and rd can't be the same register. +def : Pat<(XLenVT (sub 0, (and_oneuse GPR:$rs, 1))), + (SRAI (SLLI $rs, (ImmSubFromXLen (XLenVT 1))), + (ImmSubFromXLen (XLenVT 1)))>; + // AND with leading/trailing ones mask exceeding simm32/simm12. def : Pat<(i64 (and GPR:$rs, LeadingOnesMask:$mask)), (SLLI (SRLI $rs, LeadingOnesMask:$mask), LeadingOnesMask:$mask)>; diff --git a/llvm/test/CodeGen/RISCV/pr58511.ll b/llvm/test/CodeGen/RISCV/pr58511.ll --- a/llvm/test/CodeGen/RISCV/pr58511.ll +++ b/llvm/test/CodeGen/RISCV/pr58511.ll @@ -7,8 +7,8 @@ ; CHECK-NEXT: slliw a3, a1, 11 ; CHECK-NEXT: slliw a1, a1, 12 ; CHECK-NEXT: subw a1, a1, a3 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: slli a0, a0, 63 +; CHECK-NEXT: srai a0, a0, 63 ; CHECK-NEXT: li a3, 1 ; CHECK-NEXT: slli a3, a3, 11 ; CHECK-NEXT: or a0, a0, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ -; RUN: | FileCheck %s +; RUN: | FileCheck --check-prefixes=CHECK,RV32 %s ; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ -; RUN: | FileCheck %s +; RUN: | FileCheck --check-prefixes=CHECK,RV64 %s declare i1 @llvm.vp.reduce.and.v1i1(i1, <1 x i1>, <1 x i1>, i32) @@ -24,17 +24,29 @@ declare i1 @llvm.vp.reduce.or.v1i1(i1, <1 x i1>, <1 x i1>, i32) define signext i1 @vpreduce_or_v1i1(i1 signext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_or_v1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_or_v1i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_or_v1i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.or.v1i1(i1 %s, <1 x i1> %v, <1 x i1> %m, i32 %evl) ret i1 %r } @@ -42,16 +54,27 @@ declare i1 @llvm.vp.reduce.xor.v1i1(i1, <1 x i1>, <1 x i1>, i32) define signext i1 @vpreduce_xor_v1i1(i1 signext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_xor_v1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_xor_v1i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_xor_v1i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.v1i1(i1 %s, <1 x i1> %v, <1 x i1> %m, i32 %evl) ret i1 %r } @@ -76,17 +99,29 @@ declare i1 @llvm.vp.reduce.or.v2i1(i1, <2 x i1>, <2 x i1>, i32) define signext i1 @vpreduce_or_v2i1(i1 signext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_or_v2i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_or_v2i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_or_v2i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.or.v2i1(i1 %s, <2 x i1> %v, <2 x i1> %m, i32 %evl) ret i1 %r } @@ -94,16 +129,27 @@ declare i1 @llvm.vp.reduce.xor.v2i1(i1, <2 x i1>, <2 x i1>, i32) define signext i1 @vpreduce_xor_v2i1(i1 signext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_xor_v2i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_xor_v2i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_xor_v2i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.v2i1(i1 %s, <2 x i1> %v, <2 x i1> %m, i32 %evl) ret i1 %r } @@ -128,17 +174,29 @@ declare i1 @llvm.vp.reduce.or.v4i1(i1, <4 x i1>, <4 x i1>, i32) define signext i1 @vpreduce_or_v4i1(i1 signext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_or_v4i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_or_v4i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_or_v4i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.or.v4i1(i1 %s, <4 x i1> %v, <4 x i1> %m, i32 %evl) ret i1 %r } @@ -146,16 +204,27 @@ declare i1 @llvm.vp.reduce.xor.v4i1(i1, <4 x i1>, <4 x i1>, i32) define signext i1 @vpreduce_xor_v4i1(i1 signext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_xor_v4i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_xor_v4i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_xor_v4i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.v4i1(i1 %s, <4 x i1> %v, <4 x i1> %m, i32 %evl) ret i1 %r } @@ -180,17 +249,29 @@ declare i1 @llvm.vp.reduce.or.v8i1(i1, <8 x i1>, <8 x i1>, i32) define signext i1 @vpreduce_or_v8i1(i1 signext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_or_v8i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_or_v8i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_or_v8i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.or.v8i1(i1 %s, <8 x i1> %v, <8 x i1> %m, i32 %evl) ret i1 %r } @@ -198,16 +279,27 @@ declare i1 @llvm.vp.reduce.xor.v8i1(i1, <8 x i1>, <8 x i1>, i32) define signext i1 @vpreduce_xor_v8i1(i1 signext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_xor_v8i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_xor_v8i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_xor_v8i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.v8i1(i1 %s, <8 x i1> %v, <8 x i1> %m, i32 %evl) ret i1 %r } @@ -282,17 +374,29 @@ declare i1 @llvm.vp.reduce.or.v16i1(i1, <16 x i1>, <16 x i1>, i32) define signext i1 @vpreduce_or_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_or_v16i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_or_v16i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_or_v16i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.or.v16i1(i1 %s, <16 x i1> %v, <16 x i1> %m, i32 %evl) ret i1 %r } @@ -300,16 +404,27 @@ declare i1 @llvm.vp.reduce.xor.v16i1(i1, <16 x i1>, <16 x i1>, i32) define signext i1 @vpreduce_xor_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_xor_v16i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_xor_v16i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_xor_v16i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.v16i1(i1 %s, <16 x i1> %v, <16 x i1> %m, i32 %evl) ret i1 %r } @@ -317,16 +432,27 @@ declare i1 @llvm.vp.reduce.add.v1i1(i1, <1 x i1>, <1 x i1>, i32) define signext i1 @vpreduce_add_v1i1(i1 signext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_add_v1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_add_v1i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_add_v1i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.add.v1i1(i1 %s, <1 x i1> %v, <1 x i1> %m, i32 %evl) ret i1 %r } @@ -334,16 +460,27 @@ declare i1 @llvm.vp.reduce.add.v2i1(i1, <2 x i1>, <2 x i1>, i32) define signext i1 @vpreduce_add_v2i1(i1 signext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_add_v2i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_add_v2i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_add_v2i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.add.v2i1(i1 %s, <2 x i1> %v, <2 x i1> %m, i32 %evl) ret i1 %r } @@ -351,16 +488,27 @@ declare i1 @llvm.vp.reduce.add.v4i1(i1, <4 x i1>, <4 x i1>, i32) define signext i1 @vpreduce_add_v4i1(i1 signext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_add_v4i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_add_v4i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_add_v4i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.add.v4i1(i1 %s, <4 x i1> %v, <4 x i1> %m, i32 %evl) ret i1 %r } @@ -368,16 +516,27 @@ declare i1 @llvm.vp.reduce.add.v8i1(i1, <8 x i1>, <8 x i1>, i32) define signext i1 @vpreduce_add_v8i1(i1 signext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_add_v8i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_add_v8i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_add_v8i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.add.v8i1(i1 %s, <8 x i1> %v, <8 x i1> %m, i32 %evl) ret i1 %r } @@ -385,16 +544,27 @@ declare i1 @llvm.vp.reduce.add.v16i1(i1, <16 x i1>, <16 x i1>, i32) define signext i1 @vpreduce_add_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_add_v16i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_add_v16i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_add_v16i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.add.v16i1(i1 %s, <16 x i1> %v, <16 x i1> %m, i32 %evl) ret i1 %r } @@ -521,17 +691,29 @@ declare i1 @llvm.vp.reduce.smin.v1i1(i1, <1 x i1>, <1 x i1>, i32) define signext i1 @vpreduce_smin_v1i1(i1 signext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_smin_v1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_smin_v1i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_smin_v1i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.v1i1(i1 %s, <1 x i1> %v, <1 x i1> %m, i32 %evl) ret i1 %r } @@ -539,17 +721,29 @@ declare i1 @llvm.vp.reduce.smin.v2i1(i1, <2 x i1>, <2 x i1>, i32) define signext i1 @vpreduce_smin_v2i1(i1 signext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_smin_v2i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_smin_v2i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_smin_v2i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.v2i1(i1 %s, <2 x i1> %v, <2 x i1> %m, i32 %evl) ret i1 %r } @@ -557,17 +751,29 @@ declare i1 @llvm.vp.reduce.smin.v4i1(i1, <4 x i1>, <4 x i1>, i32) define signext i1 @vpreduce_smin_v4i1(i1 signext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_smin_v4i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_smin_v4i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_smin_v4i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.v4i1(i1 %s, <4 x i1> %v, <4 x i1> %m, i32 %evl) ret i1 %r } @@ -575,17 +781,29 @@ declare i1 @llvm.vp.reduce.smin.v8i1(i1, <8 x i1>, <8 x i1>, i32) define signext i1 @vpreduce_smin_v8i1(i1 signext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_smin_v8i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_smin_v8i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_smin_v8i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.v8i1(i1 %s, <8 x i1> %v, <8 x i1> %m, i32 %evl) ret i1 %r } @@ -593,17 +811,29 @@ declare i1 @llvm.vp.reduce.smin.v16i1(i1, <16 x i1>, <16 x i1>, i32) define signext i1 @vpreduce_smin_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_smin_v16i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_smin_v16i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_smin_v16i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.v16i1(i1 %s, <16 x i1> %v, <16 x i1> %m, i32 %evl) ret i1 %r } @@ -611,17 +841,29 @@ declare i1 @llvm.vp.reduce.smin.v32i1(i1, <32 x i1>, <32 x i1>, i32) define signext i1 @vpreduce_smin_v32i1(i1 signext %s, <32 x i1> %v, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_smin_v32i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_smin_v32i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_smin_v32i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.v32i1(i1 %s, <32 x i1> %v, <32 x i1> %m, i32 %evl) ret i1 %r } @@ -629,17 +871,29 @@ declare i1 @llvm.vp.reduce.smin.v64i1(i1, <64 x i1>, <64 x i1>, i32) define signext i1 @vpreduce_smin_v64i1(i1 signext %s, <64 x i1> %v, <64 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_smin_v64i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_smin_v64i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_smin_v64i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.v64i1(i1 %s, <64 x i1> %v, <64 x i1> %m, i32 %evl) ret i1 %r } @@ -647,17 +901,29 @@ declare i1 @llvm.vp.reduce.umax.v1i1(i1, <1 x i1>, <1 x i1>, i32) define signext i1 @vpreduce_umax_v1i1(i1 signext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_umax_v1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_umax_v1i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_umax_v1i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.v1i1(i1 %s, <1 x i1> %v, <1 x i1> %m, i32 %evl) ret i1 %r } @@ -665,17 +931,29 @@ declare i1 @llvm.vp.reduce.umax.v2i1(i1, <2 x i1>, <2 x i1>, i32) define signext i1 @vpreduce_umax_v2i1(i1 signext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_umax_v2i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_umax_v2i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_umax_v2i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.v2i1(i1 %s, <2 x i1> %v, <2 x i1> %m, i32 %evl) ret i1 %r } @@ -683,17 +961,29 @@ declare i1 @llvm.vp.reduce.umax.v4i1(i1, <4 x i1>, <4 x i1>, i32) define signext i1 @vpreduce_umax_v4i1(i1 signext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_umax_v4i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_umax_v4i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_umax_v4i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.v4i1(i1 %s, <4 x i1> %v, <4 x i1> %m, i32 %evl) ret i1 %r } @@ -701,17 +991,29 @@ declare i1 @llvm.vp.reduce.umax.v8i1(i1, <8 x i1>, <8 x i1>, i32) define signext i1 @vpreduce_umax_v8i1(i1 signext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_umax_v8i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_umax_v8i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_umax_v8i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.v8i1(i1 %s, <8 x i1> %v, <8 x i1> %m, i32 %evl) ret i1 %r } @@ -719,17 +1021,29 @@ declare i1 @llvm.vp.reduce.umax.v16i1(i1, <16 x i1>, <16 x i1>, i32) define signext i1 @vpreduce_umax_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_umax_v16i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_umax_v16i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_umax_v16i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.v16i1(i1 %s, <16 x i1> %v, <16 x i1> %m, i32 %evl) ret i1 %r } @@ -737,17 +1051,29 @@ declare i1 @llvm.vp.reduce.umax.v32i1(i1, <32 x i1>, <32 x i1>, i32) define signext i1 @vpreduce_umax_v32i1(i1 signext %s, <32 x i1> %v, <32 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_umax_v32i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_umax_v32i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_umax_v32i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.v32i1(i1 %s, <32 x i1> %v, <32 x i1> %m, i32 %evl) ret i1 %r } @@ -755,17 +1081,29 @@ declare i1 @llvm.vp.reduce.umax.v64i1(i1, <64 x i1>, <64 x i1>, i32) define signext i1 @vpreduce_umax_v64i1(i1 signext %s, <64 x i1> %v, <64 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_umax_v64i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_umax_v64i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_umax_v64i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.v64i1(i1 %s, <64 x i1> %v, <64 x i1> %m, i32 %evl) ret i1 %r } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll @@ -7,15 +7,25 @@ declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1>) define signext i1 @vreduce_or_v1i1(<1 x i1> %v) { -; CHECK-LABEL: vreduce_or_v1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; LMULMAX1-LABEL: vreduce_or_v1i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; LMULMAX1-NEXT: vmv.v.i v8, 0 +; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX1-NEXT: vmv.x.s a0, v8 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_or_v1i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; LMULMAX8-NEXT: vmv.v.i v8, 0 +; LMULMAX8-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX8-NEXT: vmv.x.s a0, v8 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %v) ret i1 %red } @@ -23,15 +33,25 @@ declare i1 @llvm.vector.reduce.xor.v1i1(<1 x i1>) define signext i1 @vreduce_xor_v1i1(<1 x i1> %v) { -; CHECK-LABEL: vreduce_xor_v1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; LMULMAX1-LABEL: vreduce_xor_v1i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; LMULMAX1-NEXT: vmv.v.i v8, 0 +; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX1-NEXT: vmv.x.s a0, v8 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_xor_v1i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; LMULMAX8-NEXT: vmv.v.i v8, 0 +; LMULMAX8-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX8-NEXT: vmv.x.s a0, v8 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %v) ret i1 %red } @@ -39,15 +59,25 @@ declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1>) define signext i1 @vreduce_and_v1i1(<1 x i1> %v) { -; CHECK-LABEL: vreduce_and_v1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; LMULMAX1-LABEL: vreduce_and_v1i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; LMULMAX1-NEXT: vmv.v.i v8, 0 +; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX1-NEXT: vmv.x.s a0, v8 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_and_v1i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; LMULMAX8-NEXT: vmv.v.i v8, 0 +; LMULMAX8-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX8-NEXT: vmv.x.s a0, v8 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %v) ret i1 %red } @@ -55,15 +85,25 @@ declare i1 @llvm.vector.reduce.umax.v1i1(<1 x i1>) define signext i1 @vreduce_umax_v1i1(<1 x i1> %v) { -; CHECK-LABEL: vreduce_umax_v1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; LMULMAX1-LABEL: vreduce_umax_v1i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; LMULMAX1-NEXT: vmv.v.i v8, 0 +; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX1-NEXT: vmv.x.s a0, v8 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_umax_v1i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; LMULMAX8-NEXT: vmv.v.i v8, 0 +; LMULMAX8-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX8-NEXT: vmv.x.s a0, v8 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> %v) ret i1 %red } @@ -71,15 +111,25 @@ declare i1 @llvm.vector.reduce.smax.v1i1(<1 x i1>) define signext i1 @vreduce_smax_v1i1(<1 x i1> %v) { -; CHECK-LABEL: vreduce_smax_v1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; LMULMAX1-LABEL: vreduce_smax_v1i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; LMULMAX1-NEXT: vmv.v.i v8, 0 +; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX1-NEXT: vmv.x.s a0, v8 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_smax_v1i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; LMULMAX8-NEXT: vmv.v.i v8, 0 +; LMULMAX8-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX8-NEXT: vmv.x.s a0, v8 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> %v) ret i1 %red } @@ -87,15 +137,25 @@ declare i1 @llvm.vector.reduce.umin.v1i1(<1 x i1>) define signext i1 @vreduce_umin_v1i1(<1 x i1> %v) { -; CHECK-LABEL: vreduce_umin_v1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; LMULMAX1-LABEL: vreduce_umin_v1i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; LMULMAX1-NEXT: vmv.v.i v8, 0 +; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX1-NEXT: vmv.x.s a0, v8 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_umin_v1i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; LMULMAX8-NEXT: vmv.v.i v8, 0 +; LMULMAX8-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX8-NEXT: vmv.x.s a0, v8 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> %v) ret i1 %red } @@ -103,15 +163,25 @@ declare i1 @llvm.vector.reduce.smin.v1i1(<1 x i1>) define signext i1 @vreduce_smin_v1i1(<1 x i1> %v) { -; CHECK-LABEL: vreduce_smin_v1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; LMULMAX1-LABEL: vreduce_smin_v1i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; LMULMAX1-NEXT: vmv.v.i v8, 0 +; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX1-NEXT: vmv.x.s a0, v8 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_smin_v1i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; LMULMAX8-NEXT: vmv.v.i v8, 0 +; LMULMAX8-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX8-NEXT: vmv.x.s a0, v8 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.v1i1(<1 x i1> %v) ret i1 %red } @@ -133,13 +203,21 @@ declare i1 @llvm.vector.reduce.xor.v2i1(<2 x i1>) define signext i1 @vreduce_xor_v2i1(<2 x i1> %v) { -; CHECK-LABEL: vreduce_xor_v2i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; LMULMAX1-LABEL: vreduce_xor_v2i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; LMULMAX1-NEXT: vcpop.m a0, v0 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_xor_v2i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; LMULMAX8-NEXT: vcpop.m a0, v0 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %v) ret i1 %red } @@ -234,13 +312,21 @@ declare i1 @llvm.vector.reduce.xor.v4i1(<4 x i1>) define signext i1 @vreduce_xor_v4i1(<4 x i1> %v) { -; CHECK-LABEL: vreduce_xor_v4i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; LMULMAX1-LABEL: vreduce_xor_v4i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; LMULMAX1-NEXT: vcpop.m a0, v0 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_xor_v4i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; LMULMAX8-NEXT: vcpop.m a0, v0 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %v) ret i1 %red } @@ -335,13 +421,21 @@ declare i1 @llvm.vector.reduce.xor.v8i1(<8 x i1>) define signext i1 @vreduce_xor_v8i1(<8 x i1> %v) { -; CHECK-LABEL: vreduce_xor_v8i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; LMULMAX1-LABEL: vreduce_xor_v8i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; LMULMAX1-NEXT: vcpop.m a0, v0 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_xor_v8i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; LMULMAX8-NEXT: vcpop.m a0, v0 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %v) ret i1 %red } @@ -436,13 +530,21 @@ declare i1 @llvm.vector.reduce.xor.v16i1(<16 x i1>) define signext i1 @vreduce_xor_v16i1(<16 x i1> %v) { -; CHECK-LABEL: vreduce_xor_v16i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; LMULMAX1-LABEL: vreduce_xor_v16i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; LMULMAX1-NEXT: vcpop.m a0, v0 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_xor_v16i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; LMULMAX8-NEXT: vcpop.m a0, v0 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %v) ret i1 %red } @@ -552,8 +654,8 @@ ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-NEXT: vmxor.mm v8, v0, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: andi a0, a0, 1 -; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_xor_v32i1: @@ -561,8 +663,8 @@ ; LMULMAX8-NEXT: li a0, 32 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: andi a0, a0, 1 -; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %v) ret i1 %red @@ -727,8 +829,8 @@ ; LMULMAX1-NEXT: vmxor.mm v9, v0, v9 ; LMULMAX1-NEXT: vmxor.mm v8, v9, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: andi a0, a0, 1 -; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_xor_v64i1: @@ -736,8 +838,8 @@ ; LMULMAX8-NEXT: li a0, 64 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: andi a0, a0, 1 -; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %v) ret i1 %red @@ -879,15 +981,25 @@ declare i1 @llvm.vector.reduce.add.v1i1(<1 x i1>) define signext i1 @vreduce_add_v1i1(<1 x i1> %v) { -; CHECK-LABEL: vreduce_add_v1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; LMULMAX1-LABEL: vreduce_add_v1i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; LMULMAX1-NEXT: vmv.v.i v8, 0 +; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX1-NEXT: vmv.x.s a0, v8 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_add_v1i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; LMULMAX8-NEXT: vmv.v.i v8, 0 +; LMULMAX8-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX8-NEXT: vmv.x.s a0, v8 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> %v) ret i1 %red } @@ -895,13 +1007,21 @@ declare i1 @llvm.vector.reduce.add.v2i1(<2 x i1>) define signext i1 @vreduce_add_v2i1(<2 x i1> %v) { -; CHECK-LABEL: vreduce_add_v2i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; LMULMAX1-LABEL: vreduce_add_v2i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; LMULMAX1-NEXT: vcpop.m a0, v0 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_add_v2i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; LMULMAX8-NEXT: vcpop.m a0, v0 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> %v) ret i1 %red } @@ -909,13 +1029,21 @@ declare i1 @llvm.vector.reduce.add.v4i1(<4 x i1>) define signext i1 @vreduce_add_v4i1(<4 x i1> %v) { -; CHECK-LABEL: vreduce_add_v4i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; LMULMAX1-LABEL: vreduce_add_v4i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; LMULMAX1-NEXT: vcpop.m a0, v0 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_add_v4i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; LMULMAX8-NEXT: vcpop.m a0, v0 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> %v) ret i1 %red } @@ -923,13 +1051,21 @@ declare i1 @llvm.vector.reduce.add.v8i1(<8 x i1>) define signext i1 @vreduce_add_v8i1(<8 x i1> %v) { -; CHECK-LABEL: vreduce_add_v8i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; LMULMAX1-LABEL: vreduce_add_v8i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; LMULMAX1-NEXT: vcpop.m a0, v0 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_add_v8i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; LMULMAX8-NEXT: vcpop.m a0, v0 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> %v) ret i1 %red } @@ -937,13 +1073,21 @@ declare i1 @llvm.vector.reduce.add.v16i1(<16 x i1>) define signext i1 @vreduce_add_v16i1(<16 x i1> %v) { -; CHECK-LABEL: vreduce_add_v16i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; LMULMAX1-LABEL: vreduce_add_v16i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; LMULMAX1-NEXT: vcpop.m a0, v0 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_add_v16i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; LMULMAX8-NEXT: vcpop.m a0, v0 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 +; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> %v) ret i1 %red } @@ -956,8 +1100,8 @@ ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-NEXT: vmxor.mm v8, v0, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: andi a0, a0, 1 -; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_add_v32i1: @@ -965,8 +1109,8 @@ ; LMULMAX8-NEXT: li a0, 32 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: andi a0, a0, 1 -; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> %v) ret i1 %red @@ -982,8 +1126,8 @@ ; LMULMAX1-NEXT: vmxor.mm v9, v0, v9 ; LMULMAX1-NEXT: vmxor.mm v8, v9, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: andi a0, a0, 1 -; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: slli a0, a0, 31 +; LMULMAX1-NEXT: srai a0, a0, 31 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_add_v64i1: @@ -991,8 +1135,8 @@ ; LMULMAX8-NEXT: li a0, 64 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: andi a0, a0, 1 -; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: slli a0, a0, 63 +; LMULMAX8-NEXT: srai a0, a0, 63 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> %v) ret i1 %red diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s declare i1 @llvm.vp.reduce.and.nxv1i1(i1, , , i32) @@ -22,17 +22,29 @@ declare i1 @llvm.vp.reduce.or.nxv1i1(i1, , , i32) define signext i1 @vpreduce_or_nxv1i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_or_nxv1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_or_nxv1i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_or_nxv1i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.or.nxv1i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -40,16 +52,27 @@ declare i1 @llvm.vp.reduce.xor.nxv1i1(i1, , , i32) define signext i1 @vpreduce_xor_nxv1i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_xor_nxv1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_xor_nxv1i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_xor_nxv1i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.nxv1i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -74,17 +97,29 @@ declare i1 @llvm.vp.reduce.or.nxv2i1(i1, , , i32) define signext i1 @vpreduce_or_nxv2i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_or_nxv2i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_or_nxv2i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_or_nxv2i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.or.nxv2i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -92,16 +127,27 @@ declare i1 @llvm.vp.reduce.xor.nxv2i1(i1, , , i32) define signext i1 @vpreduce_xor_nxv2i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_xor_nxv2i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_xor_nxv2i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_xor_nxv2i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.nxv2i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -126,17 +172,29 @@ declare i1 @llvm.vp.reduce.or.nxv4i1(i1, , , i32) define signext i1 @vpreduce_or_nxv4i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_or_nxv4i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_or_nxv4i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_or_nxv4i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.or.nxv4i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -144,16 +202,27 @@ declare i1 @llvm.vp.reduce.xor.nxv4i1(i1, , , i32) define signext i1 @vpreduce_xor_nxv4i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_xor_nxv4i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_xor_nxv4i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_xor_nxv4i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.nxv4i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -178,17 +247,29 @@ declare i1 @llvm.vp.reduce.or.nxv8i1(i1, , , i32) define signext i1 @vpreduce_or_nxv8i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_or_nxv8i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_or_nxv8i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_or_nxv8i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.or.nxv8i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -196,16 +277,27 @@ declare i1 @llvm.vp.reduce.xor.nxv8i1(i1, , , i32) define signext i1 @vpreduce_xor_nxv8i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_xor_nxv8i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_xor_nxv8i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_xor_nxv8i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.nxv8i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -230,17 +322,29 @@ declare i1 @llvm.vp.reduce.or.nxv16i1(i1, , , i32) define signext i1 @vpreduce_or_nxv16i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_or_nxv16i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_or_nxv16i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_or_nxv16i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.or.nxv16i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -248,16 +352,27 @@ declare i1 @llvm.vp.reduce.xor.nxv16i1(i1, , , i32) define signext i1 @vpreduce_xor_nxv16i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_xor_nxv16i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_xor_nxv16i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_xor_nxv16i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.nxv16i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -282,17 +397,29 @@ declare i1 @llvm.vp.reduce.or.nxv32i1(i1, , , i32) define signext i1 @vpreduce_or_nxv32i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_or_nxv32i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_or_nxv32i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_or_nxv32i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.or.nxv32i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -300,16 +427,27 @@ declare i1 @llvm.vp.reduce.xor.nxv32i1(i1, , , i32) define signext i1 @vpreduce_xor_nxv32i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_xor_nxv32i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_xor_nxv32i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_xor_nxv32i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.nxv32i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -317,17 +455,29 @@ declare i1 @llvm.vp.reduce.or.nxv40i1(i1, , , i32) define signext i1 @vpreduce_or_nxv40i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_or_nxv40i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_or_nxv40i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_or_nxv40i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.or.nxv40i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -352,17 +502,29 @@ declare i1 @llvm.vp.reduce.or.nxv64i1(i1, , , i32) define signext i1 @vpreduce_or_nxv64i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_or_nxv64i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_or_nxv64i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_or_nxv64i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.or.nxv64i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -370,16 +532,27 @@ declare i1 @llvm.vp.reduce.xor.nxv64i1(i1, , , i32) define signext i1 @vpreduce_xor_nxv64i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_xor_nxv64i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_xor_nxv64i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_xor_nxv64i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.xor.nxv64i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -387,32 +560,59 @@ declare i1 @llvm.vp.reduce.or.nxv128i1(i1, , , i32) define signext i1 @vpreduce_or_nxv128i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_or_nxv128i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: sub a3, a1, a2 -; CHECK-NEXT: sltu a4, a1, a3 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a3, a4, a3 -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vcpop.m a3, v8, v0.t -; CHECK-NEXT: snez a3, a3 -; CHECK-NEXT: bltu a1, a2, .LBB22_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB22_2: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vcpop.m a1, v11, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: or a0, a3, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_or_nxv128i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: sub a3, a1, a2 +; RV32-NEXT: sltu a4, a1, a3 +; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: and a3, a4, a3 +; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vcpop.m a3, v8, v0.t +; RV32-NEXT: snez a3, a3 +; RV32-NEXT: bltu a1, a2, .LBB22_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: .LBB22_2: +; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v9 +; RV32-NEXT: vcpop.m a1, v11, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: or a0, a3, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_or_nxv128i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub a3, a1, a2 +; RV64-NEXT: sltu a4, a1, a3 +; RV64-NEXT: addi a4, a4, -1 +; RV64-NEXT: and a3, a4, a3 +; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vcpop.m a3, v8, v0.t +; RV64-NEXT: snez a3, a3 +; RV64-NEXT: bltu a1, a2, .LBB22_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, a2 +; RV64-NEXT: .LBB22_2: +; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vcpop.m a1, v11, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: or a0, a3, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.or.nxv128i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -420,16 +620,27 @@ declare i1 @llvm.vp.reduce.add.nxv1i1(i1, , , i32) define signext i1 @vpreduce_add_nxv1i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_add_nxv1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_add_nxv1i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_add_nxv1i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.add.nxv1i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -437,16 +648,27 @@ declare i1 @llvm.vp.reduce.add.nxv2i1(i1, , , i32) define signext i1 @vpreduce_add_nxv2i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_add_nxv2i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_add_nxv2i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_add_nxv2i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.add.nxv2i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -454,16 +676,27 @@ declare i1 @llvm.vp.reduce.add.nxv4i1(i1, , , i32) define signext i1 @vpreduce_add_nxv4i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_add_nxv4i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_add_nxv4i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_add_nxv4i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.add.nxv4i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -471,16 +704,27 @@ declare i1 @llvm.vp.reduce.add.nxv8i1(i1, , , i32) define signext i1 @vpreduce_add_nxv8i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_add_nxv8i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_add_nxv8i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_add_nxv8i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.add.nxv8i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -488,16 +732,27 @@ declare i1 @llvm.vp.reduce.add.nxv16i1(i1, , , i32) define signext i1 @vpreduce_add_nxv16i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_add_nxv16i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_add_nxv16i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_add_nxv16i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.add.nxv16i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -505,16 +760,27 @@ declare i1 @llvm.vp.reduce.add.nxv32i1(i1, , , i32) define signext i1 @vpreduce_add_nxv32i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_add_nxv32i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_add_nxv32i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_add_nxv32i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.add.nxv32i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -522,16 +788,27 @@ declare i1 @llvm.vp.reduce.add.nxv64i1(i1, , , i32) define signext i1 @vpreduce_add_nxv64i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_add_nxv64i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_add_nxv64i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_add_nxv64i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.add.nxv64i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -659,17 +936,29 @@ declare i1 @llvm.vp.reduce.smin.nxv1i1(i1, , , i32) define signext i1 @vpreduce_smin_nxv1i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_smin_nxv1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_smin_nxv1i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_smin_nxv1i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.nxv1i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -677,17 +966,29 @@ declare i1 @llvm.vp.reduce.smin.nxv2i1(i1, , , i32) define signext i1 @vpreduce_smin_nxv2i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_smin_nxv2i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_smin_nxv2i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_smin_nxv2i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.nxv2i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -695,17 +996,29 @@ declare i1 @llvm.vp.reduce.smin.nxv4i1(i1, , , i32) define signext i1 @vpreduce_smin_nxv4i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_smin_nxv4i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_smin_nxv4i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_smin_nxv4i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.nxv4i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -713,17 +1026,29 @@ declare i1 @llvm.vp.reduce.smin.nxv8i1(i1, , , i32) define signext i1 @vpreduce_smin_nxv8i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_smin_nxv8i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_smin_nxv8i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_smin_nxv8i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.nxv8i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -731,17 +1056,29 @@ declare i1 @llvm.vp.reduce.smin.nxv16i1(i1, , , i32) define signext i1 @vpreduce_smin_nxv16i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_smin_nxv16i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_smin_nxv16i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_smin_nxv16i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.nxv16i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -749,17 +1086,29 @@ declare i1 @llvm.vp.reduce.smin.nxv32i1(i1, , , i32) define signext i1 @vpreduce_smin_nxv32i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_smin_nxv32i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_smin_nxv32i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_smin_nxv32i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.nxv32i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -767,17 +1116,29 @@ declare i1 @llvm.vp.reduce.smin.nxv64i1(i1, , , i32) define signext i1 @vpreduce_smin_nxv64i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_smin_nxv64i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_smin_nxv64i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_smin_nxv64i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.smin.nxv64i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -785,17 +1146,29 @@ declare i1 @llvm.vp.reduce.umax.nxv1i1(i1, , , i32) define signext i1 @vpreduce_umax_nxv1i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_umax_nxv1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_umax_nxv1i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_umax_nxv1i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.nxv1i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -803,17 +1176,29 @@ declare i1 @llvm.vp.reduce.umax.nxv2i1(i1, , , i32) define signext i1 @vpreduce_umax_nxv2i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_umax_nxv2i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_umax_nxv2i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_umax_nxv2i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.nxv2i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -821,17 +1206,29 @@ declare i1 @llvm.vp.reduce.umax.nxv4i1(i1, , , i32) define signext i1 @vpreduce_umax_nxv4i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_umax_nxv4i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_umax_nxv4i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_umax_nxv4i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.nxv4i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -839,17 +1236,29 @@ declare i1 @llvm.vp.reduce.umax.nxv8i1(i1, , , i32) define signext i1 @vpreduce_umax_nxv8i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_umax_nxv8i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_umax_nxv8i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_umax_nxv8i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.nxv8i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -857,17 +1266,29 @@ declare i1 @llvm.vp.reduce.umax.nxv16i1(i1, , , i32) define signext i1 @vpreduce_umax_nxv16i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_umax_nxv16i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_umax_nxv16i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_umax_nxv16i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.nxv16i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -875,17 +1296,29 @@ declare i1 @llvm.vp.reduce.umax.nxv32i1(i1, , , i32) define signext i1 @vpreduce_umax_nxv32i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_umax_nxv32i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_umax_nxv32i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_umax_nxv32i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.nxv32i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } @@ -893,17 +1326,29 @@ declare i1 @llvm.vp.reduce.umax.nxv64i1(i1, , , i32) define signext i1 @vpreduce_umax_nxv64i1(i1 signext %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_umax_nxv64i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a1, v9, v0.t -; CHECK-NEXT: snez a1, a1 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: vpreduce_umax_nxv64i1: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vcpop.m a1, v9, v0.t +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_umax_nxv64i1: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vcpop.m a1, v9, v0.t +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: ret %r = call i1 @llvm.vp.reduce.umax.nxv64i1(i1 %s, %v, %m, i32 %evl) ret i1 %r } diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll @@ -19,13 +19,6 @@ declare i1 @llvm.vector.reduce.xor.nxv1i1() define signext i1 @vreduce_xor_nxv1i1( %v) { -; CHECK-LABEL: vreduce_xor_nxv1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.nxv1i1( %v) ret i1 %red } @@ -120,13 +113,6 @@ declare i1 @llvm.vector.reduce.xor.nxv2i1() define signext i1 @vreduce_xor_nxv2i1( %v) { -; CHECK-LABEL: vreduce_xor_nxv2i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.nxv2i1( %v) ret i1 %red } @@ -221,13 +207,6 @@ declare i1 @llvm.vector.reduce.xor.nxv4i1() define signext i1 @vreduce_xor_nxv4i1( %v) { -; CHECK-LABEL: vreduce_xor_nxv4i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.nxv4i1( %v) ret i1 %red } @@ -322,13 +301,6 @@ declare i1 @llvm.vector.reduce.xor.nxv8i1() define signext i1 @vreduce_xor_nxv8i1( %v) { -; CHECK-LABEL: vreduce_xor_nxv8i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.nxv8i1( %v) ret i1 %red } @@ -423,13 +395,6 @@ declare i1 @llvm.vector.reduce.xor.nxv16i1() define signext i1 @vreduce_xor_nxv16i1( %v) { -; CHECK-LABEL: vreduce_xor_nxv16i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.nxv16i1( %v) ret i1 %red } @@ -524,13 +489,6 @@ declare i1 @llvm.vector.reduce.xor.nxv32i1() define signext i1 @vreduce_xor_nxv32i1( %v) { -; CHECK-LABEL: vreduce_xor_nxv32i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.nxv32i1( %v) ret i1 %red } @@ -625,13 +583,6 @@ declare i1 @llvm.vector.reduce.xor.nxv64i1() define signext i1 @vreduce_xor_nxv64i1( %v) { -; CHECK-LABEL: vreduce_xor_nxv64i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.xor.nxv64i1( %v) ret i1 %red } @@ -712,13 +663,6 @@ declare i1 @llvm.vector.reduce.add.nxv1i1() define signext i1 @vreduce_add_nxv1i1( %v) { -; CHECK-LABEL: vreduce_add_nxv1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.nxv1i1( %v) ret i1 %red } @@ -726,13 +670,6 @@ declare i1 @llvm.vector.reduce.add.nxv2i1() define signext i1 @vreduce_add_nxv2i1( %v) { -; CHECK-LABEL: vreduce_add_nxv2i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.nxv2i1( %v) ret i1 %red } @@ -740,13 +677,6 @@ declare i1 @llvm.vector.reduce.add.nxv4i1() define signext i1 @vreduce_add_nxv4i1( %v) { -; CHECK-LABEL: vreduce_add_nxv4i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.nxv4i1( %v) ret i1 %red } @@ -754,13 +684,6 @@ declare i1 @llvm.vector.reduce.add.nxv8i1() define signext i1 @vreduce_add_nxv8i1( %v) { -; CHECK-LABEL: vreduce_add_nxv8i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.nxv8i1( %v) ret i1 %red } @@ -768,13 +691,6 @@ declare i1 @llvm.vector.reduce.add.nxv16i1() define signext i1 @vreduce_add_nxv16i1( %v) { -; CHECK-LABEL: vreduce_add_nxv16i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.nxv16i1( %v) ret i1 %red } @@ -782,13 +698,6 @@ declare i1 @llvm.vector.reduce.add.nxv32i1() define signext i1 @vreduce_add_nxv32i1( %v) { -; CHECK-LABEL: vreduce_add_nxv32i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.nxv32i1( %v) ret i1 %red } @@ -796,13 +705,6 @@ declare i1 @llvm.vector.reduce.add.nxv64i1() define signext i1 @vreduce_add_nxv64i1( %v) { -; CHECK-LABEL: vreduce_add_nxv64i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.add.nxv64i1( %v) ret i1 %red } diff --git a/llvm/test/CodeGen/RISCV/select.ll b/llvm/test/CodeGen/RISCV/select.ll --- a/llvm/test/CodeGen/RISCV/select.ll +++ b/llvm/test/CodeGen/RISCV/select.ll @@ -6,16 +6,16 @@ define i16 @select_xor_1(i16 %A, i8 %cond) { ; RV32-LABEL: select_xor_1: ; RV32: # %bb.0: # %entry -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: neg a1, a1 +; RV32-NEXT: slli a1, a1, 31 +; RV32-NEXT: srai a1, a1, 31 ; RV32-NEXT: andi a1, a1, 43 ; RV32-NEXT: xor a0, a1, a0 ; RV32-NEXT: ret ; ; NOCONDOPS-LABEL: select_xor_1: ; NOCONDOPS: # %bb.0: # %entry -; NOCONDOPS-NEXT: andi a1, a1, 1 -; NOCONDOPS-NEXT: negw a1, a1 +; NOCONDOPS-NEXT: slli a1, a1, 63 +; NOCONDOPS-NEXT: srai a1, a1, 63 ; NOCONDOPS-NEXT: andi a1, a1, 43 ; NOCONDOPS-NEXT: xor a0, a1, a0 ; NOCONDOPS-NEXT: ret @@ -41,16 +41,16 @@ define i16 @select_xor_1b(i16 %A, i8 %cond) { ; RV32-LABEL: select_xor_1b: ; RV32: # %bb.0: # %entry -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: neg a1, a1 +; RV32-NEXT: slli a1, a1, 31 +; RV32-NEXT: srai a1, a1, 31 ; RV32-NEXT: andi a1, a1, 43 ; RV32-NEXT: xor a0, a1, a0 ; RV32-NEXT: ret ; ; NOCONDOPS-LABEL: select_xor_1b: ; NOCONDOPS: # %bb.0: # %entry -; NOCONDOPS-NEXT: andi a1, a1, 1 -; NOCONDOPS-NEXT: negw a1, a1 +; NOCONDOPS-NEXT: slli a1, a1, 63 +; NOCONDOPS-NEXT: srai a1, a1, 63 ; NOCONDOPS-NEXT: andi a1, a1, 43 ; NOCONDOPS-NEXT: xor a0, a1, a0 ; NOCONDOPS-NEXT: ret @@ -73,16 +73,16 @@ define i32 @select_xor_2(i32 %A, i32 %B, i8 %cond) { ; RV32-LABEL: select_xor_2: ; RV32: # %bb.0: # %entry -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: neg a2, a2 +; RV32-NEXT: slli a2, a2, 31 +; RV32-NEXT: srai a2, a2, 31 ; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: xor a0, a1, a0 ; RV32-NEXT: ret ; ; NOCONDOPS-LABEL: select_xor_2: ; NOCONDOPS: # %bb.0: # %entry -; NOCONDOPS-NEXT: andi a2, a2, 1 -; NOCONDOPS-NEXT: neg a2, a2 +; NOCONDOPS-NEXT: slli a2, a2, 63 +; NOCONDOPS-NEXT: srai a2, a2, 63 ; NOCONDOPS-NEXT: and a1, a2, a1 ; NOCONDOPS-NEXT: xor a0, a1, a0 ; NOCONDOPS-NEXT: ret @@ -107,16 +107,16 @@ define i32 @select_xor_2b(i32 %A, i32 %B, i8 %cond) { ; RV32-LABEL: select_xor_2b: ; RV32: # %bb.0: # %entry -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: neg a2, a2 +; RV32-NEXT: slli a2, a2, 31 +; RV32-NEXT: srai a2, a2, 31 ; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: xor a0, a1, a0 ; RV32-NEXT: ret ; ; NOCONDOPS-LABEL: select_xor_2b: ; NOCONDOPS: # %bb.0: # %entry -; NOCONDOPS-NEXT: andi a2, a2, 1 -; NOCONDOPS-NEXT: neg a2, a2 +; NOCONDOPS-NEXT: slli a2, a2, 63 +; NOCONDOPS-NEXT: srai a2, a2, 63 ; NOCONDOPS-NEXT: and a1, a2, a1 ; NOCONDOPS-NEXT: xor a0, a1, a0 ; NOCONDOPS-NEXT: ret @@ -138,16 +138,16 @@ define i32 @select_or(i32 %A, i32 %B, i8 %cond) { ; RV32-LABEL: select_or: ; RV32: # %bb.0: # %entry -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: neg a2, a2 +; RV32-NEXT: slli a2, a2, 31 +; RV32-NEXT: srai a2, a2, 31 ; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: ret ; ; NOCONDOPS-LABEL: select_or: ; NOCONDOPS: # %bb.0: # %entry -; NOCONDOPS-NEXT: andi a2, a2, 1 -; NOCONDOPS-NEXT: neg a2, a2 +; NOCONDOPS-NEXT: slli a2, a2, 63 +; NOCONDOPS-NEXT: srai a2, a2, 63 ; NOCONDOPS-NEXT: and a1, a2, a1 ; NOCONDOPS-NEXT: or a0, a1, a0 ; NOCONDOPS-NEXT: ret @@ -172,16 +172,16 @@ define i32 @select_or_b(i32 %A, i32 %B, i8 %cond) { ; RV32-LABEL: select_or_b: ; RV32: # %bb.0: # %entry -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: neg a2, a2 +; RV32-NEXT: slli a2, a2, 31 +; RV32-NEXT: srai a2, a2, 31 ; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: ret ; ; NOCONDOPS-LABEL: select_or_b: ; NOCONDOPS: # %bb.0: # %entry -; NOCONDOPS-NEXT: andi a2, a2, 1 -; NOCONDOPS-NEXT: neg a2, a2 +; NOCONDOPS-NEXT: slli a2, a2, 63 +; NOCONDOPS-NEXT: srai a2, a2, 63 ; NOCONDOPS-NEXT: and a1, a2, a1 ; NOCONDOPS-NEXT: or a0, a1, a0 ; NOCONDOPS-NEXT: ret @@ -203,16 +203,16 @@ define i32 @select_or_1(i32 %A, i32 %B, i32 %cond) { ; RV32-LABEL: select_or_1: ; RV32: # %bb.0: # %entry -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: neg a2, a2 +; RV32-NEXT: slli a2, a2, 31 +; RV32-NEXT: srai a2, a2, 31 ; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: ret ; ; NOCONDOPS-LABEL: select_or_1: ; NOCONDOPS: # %bb.0: # %entry -; NOCONDOPS-NEXT: andi a2, a2, 1 -; NOCONDOPS-NEXT: neg a2, a2 +; NOCONDOPS-NEXT: slli a2, a2, 63 +; NOCONDOPS-NEXT: srai a2, a2, 63 ; NOCONDOPS-NEXT: and a1, a2, a1 ; NOCONDOPS-NEXT: or a0, a1, a0 ; NOCONDOPS-NEXT: ret @@ -237,16 +237,16 @@ define i32 @select_or_1b(i32 %A, i32 %B, i32 %cond) { ; RV32-LABEL: select_or_1b: ; RV32: # %bb.0: # %entry -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: neg a2, a2 +; RV32-NEXT: slli a2, a2, 31 +; RV32-NEXT: srai a2, a2, 31 ; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: ret ; ; NOCONDOPS-LABEL: select_or_1b: ; NOCONDOPS: # %bb.0: # %entry -; NOCONDOPS-NEXT: andi a2, a2, 1 -; NOCONDOPS-NEXT: neg a2, a2 +; NOCONDOPS-NEXT: slli a2, a2, 63 +; NOCONDOPS-NEXT: srai a2, a2, 63 ; NOCONDOPS-NEXT: and a1, a2, a1 ; NOCONDOPS-NEXT: or a0, a1, a0 ; NOCONDOPS-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll --- a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll @@ -216,12 +216,19 @@ } define i8 @sel_shift_bool_i8(i1 %t) { -; CHECK-LABEL: sel_shift_bool_i8: -; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: andi a0, a0, -128 -; CHECK-NEXT: ret +; RV32-LABEL: sel_shift_bool_i8: +; RV32: # %bb.0: +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: andi a0, a0, -128 +; RV32-NEXT: ret +; +; RV64-LABEL: sel_shift_bool_i8: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: andi a0, a0, -128 +; RV64-NEXT: ret %shl = select i1 %t, i8 128, i8 0 ret i8 %shl } diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll --- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll +++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll @@ -7,14 +7,14 @@ define i8 @sext_i1_to_i8(i1 %a) nounwind { ; RV32I-LABEL: sext_i1_to_i8: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a0, a0, 1 -; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: slli a0, a0, 31 +; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sext_i1_to_i8: ; RV64I: # %bb.0: -; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: slli a0, a0, 63 +; RV64I-NEXT: srai a0, a0, 63 ; RV64I-NEXT: ret %1 = sext i1 %a to i8 ret i8 %1 @@ -23,14 +23,14 @@ define i16 @sext_i1_to_i16(i1 %a) nounwind { ; RV32I-LABEL: sext_i1_to_i16: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a0, a0, 1 -; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: slli a0, a0, 31 +; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sext_i1_to_i16: ; RV64I: # %bb.0: -; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: slli a0, a0, 63 +; RV64I-NEXT: srai a0, a0, 63 ; RV64I-NEXT: ret %1 = sext i1 %a to i16 ret i16 %1 @@ -39,14 +39,14 @@ define i32 @sext_i1_to_i32(i1 %a) nounwind { ; RV32I-LABEL: sext_i1_to_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a0, a0, 1 -; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: slli a0, a0, 31 +; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sext_i1_to_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: slli a0, a0, 63 +; RV64I-NEXT: srai a0, a0, 63 ; RV64I-NEXT: ret %1 = sext i1 %a to i32 ret i32 %1 @@ -55,15 +55,15 @@ define i64 @sext_i1_to_i64(i1 %a) nounwind { ; RV32I-LABEL: sext_i1_to_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a0, a0, 1 -; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: slli a0, a0, 31 +; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sext_i1_to_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: slli a0, a0, 63 +; RV64I-NEXT: srai a0, a0, 63 ; RV64I-NEXT: ret %1 = sext i1 %a to i64 ret i64 %1 diff --git a/llvm/test/CodeGen/RISCV/short-foward-branch-opt.ll b/llvm/test/CodeGen/RISCV/short-foward-branch-opt.ll --- a/llvm/test/CodeGen/RISCV/short-foward-branch-opt.ll +++ b/llvm/test/CodeGen/RISCV/short-foward-branch-opt.ll @@ -171,8 +171,8 @@ define i16 @select_xor_1(i16 %A, i8 %cond) { ; NOSFB-LABEL: select_xor_1: ; NOSFB: # %bb.0: # %entry -; NOSFB-NEXT: andi a1, a1, 1 -; NOSFB-NEXT: negw a1, a1 +; NOSFB-NEXT: slli a1, a1, 63 +; NOSFB-NEXT: srai a1, a1, 63 ; NOSFB-NEXT: andi a1, a1, 43 ; NOSFB-NEXT: xor a0, a0, a1 ; NOSFB-NEXT: ret @@ -199,8 +199,8 @@ define i16 @select_xor_1b(i16 %A, i8 %cond) { ; NOSFB-LABEL: select_xor_1b: ; NOSFB: # %bb.0: # %entry -; NOSFB-NEXT: andi a1, a1, 1 -; NOSFB-NEXT: negw a1, a1 +; NOSFB-NEXT: slli a1, a1, 63 +; NOSFB-NEXT: srai a1, a1, 63 ; NOSFB-NEXT: andi a1, a1, 43 ; NOSFB-NEXT: xor a0, a0, a1 ; NOSFB-NEXT: ret @@ -225,8 +225,8 @@ define i32 @select_xor_2(i32 %A, i32 %B, i8 %cond) { ; NOSFB-LABEL: select_xor_2: ; NOSFB: # %bb.0: # %entry -; NOSFB-NEXT: andi a2, a2, 1 -; NOSFB-NEXT: neg a2, a2 +; NOSFB-NEXT: slli a2, a2, 63 +; NOSFB-NEXT: srai a2, a2, 63 ; NOSFB-NEXT: and a1, a1, a2 ; NOSFB-NEXT: xor a0, a0, a1 ; NOSFB-NEXT: ret @@ -252,8 +252,8 @@ define i32 @select_xor_2b(i32 %A, i32 %B, i8 %cond) { ; NOSFB-LABEL: select_xor_2b: ; NOSFB: # %bb.0: # %entry -; NOSFB-NEXT: andi a2, a2, 1 -; NOSFB-NEXT: neg a2, a2 +; NOSFB-NEXT: slli a2, a2, 63 +; NOSFB-NEXT: srai a2, a2, 63 ; NOSFB-NEXT: and a1, a1, a2 ; NOSFB-NEXT: xor a0, a0, a1 ; NOSFB-NEXT: ret @@ -277,8 +277,8 @@ define i32 @select_or(i32 %A, i32 %B, i8 %cond) { ; NOSFB-LABEL: select_or: ; NOSFB: # %bb.0: # %entry -; NOSFB-NEXT: andi a2, a2, 1 -; NOSFB-NEXT: neg a2, a2 +; NOSFB-NEXT: slli a2, a2, 63 +; NOSFB-NEXT: srai a2, a2, 63 ; NOSFB-NEXT: and a1, a1, a2 ; NOSFB-NEXT: or a0, a0, a1 ; NOSFB-NEXT: ret @@ -304,8 +304,8 @@ define i32 @select_or_b(i32 %A, i32 %B, i8 %cond) { ; NOSFB-LABEL: select_or_b: ; NOSFB: # %bb.0: # %entry -; NOSFB-NEXT: andi a2, a2, 1 -; NOSFB-NEXT: neg a2, a2 +; NOSFB-NEXT: slli a2, a2, 63 +; NOSFB-NEXT: srai a2, a2, 63 ; NOSFB-NEXT: and a1, a1, a2 ; NOSFB-NEXT: or a0, a0, a1 ; NOSFB-NEXT: ret @@ -329,8 +329,8 @@ define i32 @select_or_1(i32 %A, i32 %B, i32 %cond) { ; NOSFB-LABEL: select_or_1: ; NOSFB: # %bb.0: # %entry -; NOSFB-NEXT: andi a2, a2, 1 -; NOSFB-NEXT: neg a2, a2 +; NOSFB-NEXT: slli a2, a2, 63 +; NOSFB-NEXT: srai a2, a2, 63 ; NOSFB-NEXT: and a1, a1, a2 ; NOSFB-NEXT: or a0, a0, a1 ; NOSFB-NEXT: ret @@ -356,8 +356,8 @@ define i32 @select_or_1b(i32 %A, i32 %B, i32 %cond) { ; NOSFB-LABEL: select_or_1b: ; NOSFB: # %bb.0: # %entry -; NOSFB-NEXT: andi a2, a2, 1 -; NOSFB-NEXT: neg a2, a2 +; NOSFB-NEXT: slli a2, a2, 63 +; NOSFB-NEXT: srai a2, a2, 63 ; NOSFB-NEXT: and a1, a1, a2 ; NOSFB-NEXT: or a0, a0, a1 ; NOSFB-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -310,64 +310,64 @@ ; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s6, 0(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lw a0, 4(a0) -; RV32-NEXT: lb a1, 12(s0) -; RV32-NEXT: lw a2, 8(s0) -; RV32-NEXT: andi a3, a0, 1 -; RV32-NEXT: neg s1, a3 -; RV32-NEXT: slli a3, a1, 30 -; RV32-NEXT: srli a4, a2, 2 -; RV32-NEXT: or s2, a4, a3 -; RV32-NEXT: slli a1, a1, 29 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: neg s3, a1 -; RV32-NEXT: slli a1, a2, 31 -; RV32-NEXT: srli a0, a0, 1 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: lw s4, 0(s0) -; RV32-NEXT: slli a2, a2, 30 -; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: neg a1, a2 -; RV32-NEXT: li a2, 7 +; RV32-NEXT: lb a0, 12(a0) +; RV32-NEXT: lw a1, 8(s0) +; RV32-NEXT: slli a2, a0, 30 +; RV32-NEXT: lw a3, 4(s0) +; RV32-NEXT: srli s1, a1, 2 +; RV32-NEXT: or s1, s1, a2 +; RV32-NEXT: slli a2, a1, 31 +; RV32-NEXT: srli a4, a3, 1 +; RV32-NEXT: or s2, a4, a2 +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai s3, a0, 31 +; RV32-NEXT: srli a1, a1, 1 +; RV32-NEXT: slli a1, a1, 31 +; RV32-NEXT: lw a0, 0(s0) +; RV32-NEXT: srai s4, a1, 31 +; RV32-NEXT: slli a1, a3, 31 +; RV32-NEXT: srai a1, a1, 31 +; RV32-NEXT: li a2, 6 ; RV32-NEXT: li a3, 0 ; RV32-NEXT: call __moddi3@plt ; RV32-NEXT: mv s5, a0 ; RV32-NEXT: mv s6, a1 -; RV32-NEXT: li a2, -5 -; RV32-NEXT: li a3, -1 +; RV32-NEXT: li a2, 7 ; RV32-NEXT: mv a0, s2 -; RV32-NEXT: mv a1, s3 +; RV32-NEXT: mv a1, s4 +; RV32-NEXT: li a3, 0 ; RV32-NEXT: call __moddi3@plt ; RV32-NEXT: mv s2, a0 -; RV32-NEXT: mv s3, a1 -; RV32-NEXT: li a2, 6 -; RV32-NEXT: mv a0, s4 -; RV32-NEXT: mv a1, s1 -; RV32-NEXT: li a3, 0 +; RV32-NEXT: mv s4, a1 +; RV32-NEXT: li a2, -5 +; RV32-NEXT: li a3, -1 +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: mv a1, s3 ; RV32-NEXT: call __moddi3@plt -; RV32-NEXT: xori a2, s2, 2 -; RV32-NEXT: or a2, a2, s3 -; RV32-NEXT: seqz a2, a2 -; RV32-NEXT: xori a3, s5, 1 -; RV32-NEXT: or a3, a3, s6 -; RV32-NEXT: seqz a3, a3 +; RV32-NEXT: or a2, s5, s6 +; RV32-NEXT: snez a2, a2 +; RV32-NEXT: xori a0, a0, 2 ; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: snez a0, a0 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: neg a1, a0 -; RV32-NEXT: sw a1, 0(s0) -; RV32-NEXT: andi a1, a2, 7 -; RV32-NEXT: sb a1, 12(s0) -; RV32-NEXT: slli a1, a3, 1 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: xori a1, s2, 1 +; RV32-NEXT: or a1, a1, s4 +; RV32-NEXT: seqz a1, a1 +; RV32-NEXT: neg a3, a2 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: sw a3, 0(s0) +; RV32-NEXT: andi a3, a0, 7 +; RV32-NEXT: sb a3, 12(s0) +; RV32-NEXT: slli a3, a1, 1 +; RV32-NEXT: or a2, a3, a2 +; RV32-NEXT: sw a2, 4(s0) +; RV32-NEXT: srli a2, a1, 31 +; RV32-NEXT: andi a1, a1, 1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: or a1, a2, a1 +; RV32-NEXT: slli a0, a0, 2 ; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: sw a0, 4(s0) -; RV32-NEXT: srli a0, a3, 31 -; RV32-NEXT: andi a3, a3, 1 -; RV32-NEXT: slli a3, a3, 1 -; RV32-NEXT: or a0, a0, a3 -; RV32-NEXT: slli a2, a2, 2 -; RV32-NEXT: or a0, a0, a2 ; RV32-NEXT: sw a0, 8(s0) ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -460,64 +460,64 @@ ; RV32M-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32M-NEXT: sw s6, 0(sp) # 4-byte Folded Spill ; RV32M-NEXT: mv s0, a0 -; RV32M-NEXT: lw a0, 4(a0) -; RV32M-NEXT: lb a1, 12(s0) -; RV32M-NEXT: lw a2, 8(s0) -; RV32M-NEXT: andi a3, a0, 1 -; RV32M-NEXT: neg s1, a3 -; RV32M-NEXT: slli a3, a1, 30 -; RV32M-NEXT: srli a4, a2, 2 -; RV32M-NEXT: or s2, a4, a3 -; RV32M-NEXT: slli a1, a1, 29 -; RV32M-NEXT: srli a1, a1, 31 -; RV32M-NEXT: neg s3, a1 -; RV32M-NEXT: slli a1, a2, 31 -; RV32M-NEXT: srli a0, a0, 1 -; RV32M-NEXT: or a0, a0, a1 -; RV32M-NEXT: lw s4, 0(s0) -; RV32M-NEXT: slli a2, a2, 30 -; RV32M-NEXT: srli a2, a2, 31 -; RV32M-NEXT: neg a1, a2 -; RV32M-NEXT: li a2, 7 +; RV32M-NEXT: lb a0, 12(a0) +; RV32M-NEXT: lw a1, 8(s0) +; RV32M-NEXT: slli a2, a0, 30 +; RV32M-NEXT: lw a3, 4(s0) +; RV32M-NEXT: srli s1, a1, 2 +; RV32M-NEXT: or s1, s1, a2 +; RV32M-NEXT: slli a2, a1, 31 +; RV32M-NEXT: srli a4, a3, 1 +; RV32M-NEXT: or s2, a4, a2 +; RV32M-NEXT: srli a0, a0, 2 +; RV32M-NEXT: slli a0, a0, 31 +; RV32M-NEXT: srai s3, a0, 31 +; RV32M-NEXT: srli a1, a1, 1 +; RV32M-NEXT: slli a1, a1, 31 +; RV32M-NEXT: lw a0, 0(s0) +; RV32M-NEXT: srai s4, a1, 31 +; RV32M-NEXT: slli a1, a3, 31 +; RV32M-NEXT: srai a1, a1, 31 +; RV32M-NEXT: li a2, 6 ; RV32M-NEXT: li a3, 0 ; RV32M-NEXT: call __moddi3@plt ; RV32M-NEXT: mv s5, a0 ; RV32M-NEXT: mv s6, a1 -; RV32M-NEXT: li a2, -5 -; RV32M-NEXT: li a3, -1 +; RV32M-NEXT: li a2, 7 ; RV32M-NEXT: mv a0, s2 -; RV32M-NEXT: mv a1, s3 +; RV32M-NEXT: mv a1, s4 +; RV32M-NEXT: li a3, 0 ; RV32M-NEXT: call __moddi3@plt ; RV32M-NEXT: mv s2, a0 -; RV32M-NEXT: mv s3, a1 -; RV32M-NEXT: li a2, 6 -; RV32M-NEXT: mv a0, s4 -; RV32M-NEXT: mv a1, s1 -; RV32M-NEXT: li a3, 0 +; RV32M-NEXT: mv s4, a1 +; RV32M-NEXT: li a2, -5 +; RV32M-NEXT: li a3, -1 +; RV32M-NEXT: mv a0, s1 +; RV32M-NEXT: mv a1, s3 ; RV32M-NEXT: call __moddi3@plt -; RV32M-NEXT: xori a2, s2, 2 -; RV32M-NEXT: or a2, a2, s3 -; RV32M-NEXT: seqz a2, a2 -; RV32M-NEXT: xori a3, s5, 1 -; RV32M-NEXT: or a3, a3, s6 -; RV32M-NEXT: seqz a3, a3 +; RV32M-NEXT: or a2, s5, s6 +; RV32M-NEXT: snez a2, a2 +; RV32M-NEXT: xori a0, a0, 2 ; RV32M-NEXT: or a0, a0, a1 -; RV32M-NEXT: snez a0, a0 -; RV32M-NEXT: addi a3, a3, -1 -; RV32M-NEXT: addi a2, a2, -1 -; RV32M-NEXT: neg a1, a0 -; RV32M-NEXT: sw a1, 0(s0) -; RV32M-NEXT: andi a1, a2, 7 -; RV32M-NEXT: sb a1, 12(s0) -; RV32M-NEXT: slli a1, a3, 1 +; RV32M-NEXT: seqz a0, a0 +; RV32M-NEXT: xori a1, s2, 1 +; RV32M-NEXT: or a1, a1, s4 +; RV32M-NEXT: seqz a1, a1 +; RV32M-NEXT: neg a3, a2 +; RV32M-NEXT: addi a1, a1, -1 +; RV32M-NEXT: addi a0, a0, -1 +; RV32M-NEXT: sw a3, 0(s0) +; RV32M-NEXT: andi a3, a0, 7 +; RV32M-NEXT: sb a3, 12(s0) +; RV32M-NEXT: slli a3, a1, 1 +; RV32M-NEXT: or a2, a3, a2 +; RV32M-NEXT: sw a2, 4(s0) +; RV32M-NEXT: srli a2, a1, 31 +; RV32M-NEXT: andi a1, a1, 1 +; RV32M-NEXT: slli a1, a1, 1 +; RV32M-NEXT: or a1, a2, a1 +; RV32M-NEXT: slli a0, a0, 2 ; RV32M-NEXT: or a0, a1, a0 -; RV32M-NEXT: sw a0, 4(s0) -; RV32M-NEXT: srli a0, a3, 31 -; RV32M-NEXT: andi a3, a3, 1 -; RV32M-NEXT: slli a3, a3, 1 -; RV32M-NEXT: or a0, a0, a3 -; RV32M-NEXT: slli a2, a2, 2 -; RV32M-NEXT: or a0, a0, a2 ; RV32M-NEXT: sw a0, 8(s0) ; RV32M-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32M-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -610,22 +610,22 @@ ; RV32MV-NEXT: mv s2, a0 ; RV32MV-NEXT: lw a0, 8(a0) ; RV32MV-NEXT: lw a1, 4(s2) -; RV32MV-NEXT: slli a2, a0, 31 -; RV32MV-NEXT: srli a3, a1, 1 -; RV32MV-NEXT: or s3, a3, a2 ; RV32MV-NEXT: lb a2, 12(s2) -; RV32MV-NEXT: slli a3, a0, 30 -; RV32MV-NEXT: srli a3, a3, 31 -; RV32MV-NEXT: neg s4, a3 +; RV32MV-NEXT: slli a3, a0, 31 +; RV32MV-NEXT: srli a4, a1, 1 +; RV32MV-NEXT: or s3, a4, a3 ; RV32MV-NEXT: slli a3, a2, 30 -; RV32MV-NEXT: srli a0, a0, 2 -; RV32MV-NEXT: or s5, a0, a3 -; RV32MV-NEXT: slli a2, a2, 29 -; RV32MV-NEXT: srli a2, a2, 31 +; RV32MV-NEXT: srli a4, a0, 2 +; RV32MV-NEXT: or s4, a4, a3 +; RV32MV-NEXT: srli a0, a0, 1 +; RV32MV-NEXT: slli a0, a0, 31 +; RV32MV-NEXT: srai s5, a0, 31 +; RV32MV-NEXT: srli a2, a2, 2 +; RV32MV-NEXT: slli a2, a2, 31 ; RV32MV-NEXT: lw a0, 0(s2) -; RV32MV-NEXT: neg s6, a2 -; RV32MV-NEXT: andi a1, a1, 1 -; RV32MV-NEXT: neg a1, a1 +; RV32MV-NEXT: srai s6, a2, 31 +; RV32MV-NEXT: slli a1, a1, 31 +; RV32MV-NEXT: srai a1, a1, 31 ; RV32MV-NEXT: li a2, 6 ; RV32MV-NEXT: li a3, 0 ; RV32MV-NEXT: call __moddi3@plt @@ -633,14 +633,14 @@ ; RV32MV-NEXT: sw a0, 0(sp) ; RV32MV-NEXT: li a2, -5 ; RV32MV-NEXT: li a3, -1 -; RV32MV-NEXT: mv a0, s5 +; RV32MV-NEXT: mv a0, s4 ; RV32MV-NEXT: mv a1, s6 ; RV32MV-NEXT: call __moddi3@plt ; RV32MV-NEXT: sw a1, 20(sp) ; RV32MV-NEXT: sw a0, 16(sp) ; RV32MV-NEXT: li a2, 7 ; RV32MV-NEXT: mv a0, s3 -; RV32MV-NEXT: mv a1, s4 +; RV32MV-NEXT: mv a1, s5 ; RV32MV-NEXT: li a3, 0 ; RV32MV-NEXT: call __moddi3@plt ; RV32MV-NEXT: sw a1, 12(sp)