diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -451,7 +451,7 @@ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::SELECT, VT, Expand); + setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); @@ -538,7 +538,7 @@ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); - setOperationAction(ISD::SELECT, VT, Expand); + setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::STEP_VECTOR, VT, Custom); @@ -598,7 +598,7 @@ setOperationAction(ISD::MGATHER, VT, Custom); setOperationAction(ISD::MSCATTER, VT, Custom); - setOperationAction(ISD::SELECT, VT, Expand); + setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); @@ -668,6 +668,8 @@ setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction(ISD::TRUNCATE, VT, Custom); setOperationAction(ISD::BITCAST, VT, Custom); @@ -729,7 +731,6 @@ setOperationAction(ISD::MULHU, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); - setOperationAction(ISD::SELECT, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::ANY_EXTEND, VT, Custom); @@ -794,7 +795,7 @@ setCondCodeAction(CC, VT, Expand); setOperationAction(ISD::VSELECT, VT, Custom); - setOperationAction(ISD::SELECT, VT, Expand); + setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::BITCAST, VT, Custom); @@ -2736,19 +2737,29 @@ SDValue TrueV = Op.getOperand(1); SDValue FalseV = Op.getOperand(2); SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); MVT XLenVT = Subtarget.getXLenVT(); + // Lower vector SELECTs to VSELECTs by splatting the condition. + if (VT.isVector()) { + MVT SplatCondVT = VT.changeVectorElementType(MVT::i1); + SDValue CondSplat = VT.isScalableVector() + ? DAG.getSplatVector(SplatCondVT, DL, CondV) + : DAG.getSplatBuildVector(SplatCondVT, DL, CondV); + return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV); + } + // If the result type is XLenVT and CondV is the output of a SETCC node // which also operated on XLenVT inputs, then merge the SETCC node into the // lowered RISCVISD::SELECT_CC to take advantage of the integer // compare+branch instructions. i.e.: // (select (setcc lhs, rhs, cc), truev, falsev) // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) - if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && + if (VT == XLenVT && CondV.getOpcode() == ISD::SETCC && CondV.getOperand(0).getSimpleValueType() == XLenVT) { SDValue LHS = CondV.getOperand(0); SDValue RHS = CondV.getOperand(1); - auto CC = cast(CondV.getOperand(2)); + const auto *CC = cast(CondV.getOperand(2)); ISD::CondCode CCVal = CC->get(); // Special case for a select of 2 constants that have a diffence of 1. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll @@ -1,30 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: -verify-machineinstrs < %s | FileCheck %s define <2 x half> @select_v2f16(i1 zeroext %c, <2 x half> %a, <2 x half> %b) { ; CHECK-LABEL: select_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: bnez a0, .LBB0_2 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfmv.f.s ft0, v9 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: j .LBB0_3 -; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu -; CHECK-NEXT: vfmv.v.f v8, ft1 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, tu, mu -; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, <2 x half> %a, <2 x half> %b ret <2 x half> %v @@ -34,28 +21,11 @@ ; CHECK-LABEL: selectcc_v2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: feq.h a0, fa0, fa1 -; CHECK-NEXT: bnez a0, .LBB1_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: j .LBB1_3 -; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: .LBB1_3: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu -; CHECK-NEXT: vfmv.v.f v25, ft0 -; CHECK-NEXT: bnez a0, .LBB1_5 -; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: vfmv.f.s ft0, v9 -; CHECK-NEXT: j .LBB1_6 -; CHECK-NEXT: .LBB1_5: -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: .LBB1_6: -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, tu, mu -; CHECK-NEXT: vfmv.s.f v25, ft0 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = fcmp oeq half %a, %b %v = select i1 %cmp, <2 x half> %c, <2 x half> %d @@ -65,51 +35,11 @@ define <4 x half> @select_v4f16(i1 zeroext %c, <4 x half> %a, <4 x half> %b) { ; CHECK-LABEL: select_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: bnez a0, .LBB2_3 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfmv.f.s ft0, v9 -; CHECK-NEXT: fsh ft0, 8(sp) -; CHECK-NEXT: beqz a0, .LBB2_4 -; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 -; CHECK-NEXT: j .LBB2_5 -; CHECK-NEXT: .LBB2_3: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: fsh ft0, 8(sp) -; CHECK-NEXT: bnez a0, .LBB2_2 -; CHECK-NEXT: .LBB2_4: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v9, 3 -; CHECK-NEXT: .LBB2_5: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 14(sp) -; CHECK-NEXT: bnez a0, .LBB2_7 -; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: vslidedown.vi v25, v9, 2 -; CHECK-NEXT: j .LBB2_8 -; CHECK-NEXT: .LBB2_7: -; CHECK-NEXT: vslidedown.vi v25, v8, 2 -; CHECK-NEXT: .LBB2_8: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 12(sp) -; CHECK-NEXT: bnez a0, .LBB2_10 -; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: j .LBB2_11 -; CHECK-NEXT: .LBB2_10: -; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: .LBB2_11: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 10(sp) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, <4 x half> %a, <4 x half> %b ret <4 x half> %v @@ -118,52 +48,12 @@ define <4 x half> @selectcc_v4f16(half %a, half %b, <4 x half> %c, <4 x half> %d) { ; CHECK-LABEL: selectcc_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: feq.h a0, fa0, fa1 -; CHECK-NEXT: bnez a0, .LBB3_3 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfmv.f.s ft0, v9 -; CHECK-NEXT: fsh ft0, 8(sp) -; CHECK-NEXT: beqz a0, .LBB3_4 -; CHECK-NEXT: .LBB3_2: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 -; CHECK-NEXT: j .LBB3_5 -; CHECK-NEXT: .LBB3_3: -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: fsh ft0, 8(sp) -; CHECK-NEXT: bnez a0, .LBB3_2 -; CHECK-NEXT: .LBB3_4: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v9, 3 -; CHECK-NEXT: .LBB3_5: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 14(sp) -; CHECK-NEXT: bnez a0, .LBB3_7 -; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: vslidedown.vi v25, v9, 2 -; CHECK-NEXT: j .LBB3_8 -; CHECK-NEXT: .LBB3_7: -; CHECK-NEXT: vslidedown.vi v25, v8, 2 -; CHECK-NEXT: .LBB3_8: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 12(sp) -; CHECK-NEXT: bnez a0, .LBB3_10 -; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: j .LBB3_11 -; CHECK-NEXT: .LBB3_10: -; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: .LBB3_11: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 10(sp) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = fcmp oeq half %a, %b %v = select i1 %cmp, <4 x half> %c, <4 x half> %d @@ -173,87 +63,11 @@ define <8 x half> @select_v8f16(i1 zeroext %c, <8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: select_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: bnez a0, .LBB4_3 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.f.s ft0, v9 -; CHECK-NEXT: fsh ft0, 16(sp) -; CHECK-NEXT: beqz a0, .LBB4_4 -; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v8, 7 -; CHECK-NEXT: j .LBB4_5 -; CHECK-NEXT: .LBB4_3: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: fsh ft0, 16(sp) -; CHECK-NEXT: bnez a0, .LBB4_2 -; CHECK-NEXT: .LBB4_4: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v9, 7 -; CHECK-NEXT: .LBB4_5: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 30(sp) -; CHECK-NEXT: bnez a0, .LBB4_7 -; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: vslidedown.vi v25, v9, 6 -; CHECK-NEXT: j .LBB4_8 -; CHECK-NEXT: .LBB4_7: -; CHECK-NEXT: vslidedown.vi v25, v8, 6 -; CHECK-NEXT: .LBB4_8: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 28(sp) -; CHECK-NEXT: bnez a0, .LBB4_10 -; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: vslidedown.vi v25, v9, 5 -; CHECK-NEXT: j .LBB4_11 -; CHECK-NEXT: .LBB4_10: -; CHECK-NEXT: vslidedown.vi v25, v8, 5 -; CHECK-NEXT: .LBB4_11: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 26(sp) -; CHECK-NEXT: bnez a0, .LBB4_13 -; CHECK-NEXT: # %bb.12: -; CHECK-NEXT: vslidedown.vi v25, v9, 4 -; CHECK-NEXT: j .LBB4_14 -; CHECK-NEXT: .LBB4_13: -; CHECK-NEXT: vslidedown.vi v25, v8, 4 -; CHECK-NEXT: .LBB4_14: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 24(sp) -; CHECK-NEXT: bnez a0, .LBB4_16 -; CHECK-NEXT: # %bb.15: -; CHECK-NEXT: vslidedown.vi v25, v9, 3 -; CHECK-NEXT: j .LBB4_17 -; CHECK-NEXT: .LBB4_16: -; CHECK-NEXT: vslidedown.vi v25, v8, 3 -; CHECK-NEXT: .LBB4_17: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 22(sp) -; CHECK-NEXT: bnez a0, .LBB4_19 -; CHECK-NEXT: # %bb.18: -; CHECK-NEXT: vslidedown.vi v25, v9, 2 -; CHECK-NEXT: j .LBB4_20 -; CHECK-NEXT: .LBB4_19: -; CHECK-NEXT: vslidedown.vi v25, v8, 2 -; CHECK-NEXT: .LBB4_20: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 20(sp) -; CHECK-NEXT: bnez a0, .LBB4_22 -; CHECK-NEXT: # %bb.21: -; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: j .LBB4_23 -; CHECK-NEXT: .LBB4_22: -; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: .LBB4_23: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 18(sp) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, <8 x half> %a, <8 x half> %b ret <8 x half> %v @@ -262,88 +76,12 @@ define <8 x half> @selectcc_v8f16(half %a, half %b, <8 x half> %c, <8 x half> %d) { ; CHECK-LABEL: selectcc_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: feq.h a0, fa0, fa1 -; CHECK-NEXT: bnez a0, .LBB5_3 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.f.s ft0, v9 -; CHECK-NEXT: fsh ft0, 16(sp) -; CHECK-NEXT: beqz a0, .LBB5_4 -; CHECK-NEXT: .LBB5_2: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v8, 7 -; CHECK-NEXT: j .LBB5_5 -; CHECK-NEXT: .LBB5_3: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: fsh ft0, 16(sp) -; CHECK-NEXT: bnez a0, .LBB5_2 -; CHECK-NEXT: .LBB5_4: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v9, 7 -; CHECK-NEXT: .LBB5_5: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 30(sp) -; CHECK-NEXT: bnez a0, .LBB5_7 -; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: vslidedown.vi v25, v9, 6 -; CHECK-NEXT: j .LBB5_8 -; CHECK-NEXT: .LBB5_7: -; CHECK-NEXT: vslidedown.vi v25, v8, 6 -; CHECK-NEXT: .LBB5_8: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 28(sp) -; CHECK-NEXT: bnez a0, .LBB5_10 -; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: vslidedown.vi v25, v9, 5 -; CHECK-NEXT: j .LBB5_11 -; CHECK-NEXT: .LBB5_10: -; CHECK-NEXT: vslidedown.vi v25, v8, 5 -; CHECK-NEXT: .LBB5_11: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 26(sp) -; CHECK-NEXT: bnez a0, .LBB5_13 -; CHECK-NEXT: # %bb.12: -; CHECK-NEXT: vslidedown.vi v25, v9, 4 -; CHECK-NEXT: j .LBB5_14 -; CHECK-NEXT: .LBB5_13: -; CHECK-NEXT: vslidedown.vi v25, v8, 4 -; CHECK-NEXT: .LBB5_14: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 24(sp) -; CHECK-NEXT: bnez a0, .LBB5_16 -; CHECK-NEXT: # %bb.15: -; CHECK-NEXT: vslidedown.vi v25, v9, 3 -; CHECK-NEXT: j .LBB5_17 -; CHECK-NEXT: .LBB5_16: -; CHECK-NEXT: vslidedown.vi v25, v8, 3 -; CHECK-NEXT: .LBB5_17: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 22(sp) -; CHECK-NEXT: bnez a0, .LBB5_19 -; CHECK-NEXT: # %bb.18: -; CHECK-NEXT: vslidedown.vi v25, v9, 2 -; CHECK-NEXT: j .LBB5_20 -; CHECK-NEXT: .LBB5_19: -; CHECK-NEXT: vslidedown.vi v25, v8, 2 -; CHECK-NEXT: .LBB5_20: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 20(sp) -; CHECK-NEXT: bnez a0, .LBB5_22 -; CHECK-NEXT: # %bb.21: -; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: j .LBB5_23 -; CHECK-NEXT: .LBB5_22: -; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: .LBB5_23: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsh ft0, 18(sp) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = fcmp oeq half %a, %b %v = select i1 %cmp, <8 x half> %c, <8 x half> %d @@ -351,679 +89,28 @@ } define <16 x half> @select_v16f16(i1 zeroext %c, <16 x half> %a, <16 x half> %b) { -; RV32-LABEL: select_v16f16: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -96 -; RV32-NEXT: .cfi_def_cfa_offset 96 -; RV32-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: addi s0, sp, 96 -; RV32-NEXT: .cfi_def_cfa s0, 0 -; RV32-NEXT: andi sp, sp, -32 -; RV32-NEXT: bnez a0, .LBB6_3 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v10 -; RV32-NEXT: fsh ft0, 32(sp) -; RV32-NEXT: beqz a0, .LBB6_4 -; RV32-NEXT: .LBB6_2: -; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, mu -; RV32-NEXT: vslidedown.vi v26, v8, 15 -; RV32-NEXT: j .LBB6_5 -; RV32-NEXT: .LBB6_3: -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fsh ft0, 32(sp) -; RV32-NEXT: bnez a0, .LBB6_2 -; RV32-NEXT: .LBB6_4: -; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, mu -; RV32-NEXT: vslidedown.vi v26, v10, 15 -; RV32-NEXT: .LBB6_5: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 62(sp) -; RV32-NEXT: bnez a0, .LBB6_7 -; RV32-NEXT: # %bb.6: -; RV32-NEXT: vslidedown.vi v26, v10, 14 -; RV32-NEXT: j .LBB6_8 -; RV32-NEXT: .LBB6_7: -; RV32-NEXT: vslidedown.vi v26, v8, 14 -; RV32-NEXT: .LBB6_8: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 60(sp) -; RV32-NEXT: bnez a0, .LBB6_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: vslidedown.vi v26, v10, 13 -; RV32-NEXT: j .LBB6_11 -; RV32-NEXT: .LBB6_10: -; RV32-NEXT: vslidedown.vi v26, v8, 13 -; RV32-NEXT: .LBB6_11: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 58(sp) -; RV32-NEXT: bnez a0, .LBB6_13 -; RV32-NEXT: # %bb.12: -; RV32-NEXT: vslidedown.vi v26, v10, 12 -; RV32-NEXT: j .LBB6_14 -; RV32-NEXT: .LBB6_13: -; RV32-NEXT: vslidedown.vi v26, v8, 12 -; RV32-NEXT: .LBB6_14: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 56(sp) -; RV32-NEXT: bnez a0, .LBB6_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: vslidedown.vi v26, v10, 11 -; RV32-NEXT: j .LBB6_17 -; RV32-NEXT: .LBB6_16: -; RV32-NEXT: vslidedown.vi v26, v8, 11 -; RV32-NEXT: .LBB6_17: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 54(sp) -; RV32-NEXT: bnez a0, .LBB6_19 -; RV32-NEXT: # %bb.18: -; RV32-NEXT: vslidedown.vi v26, v10, 10 -; RV32-NEXT: j .LBB6_20 -; RV32-NEXT: .LBB6_19: -; RV32-NEXT: vslidedown.vi v26, v8, 10 -; RV32-NEXT: .LBB6_20: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 52(sp) -; RV32-NEXT: bnez a0, .LBB6_22 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: vslidedown.vi v26, v10, 9 -; RV32-NEXT: j .LBB6_23 -; RV32-NEXT: .LBB6_22: -; RV32-NEXT: vslidedown.vi v26, v8, 9 -; RV32-NEXT: .LBB6_23: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 50(sp) -; RV32-NEXT: bnez a0, .LBB6_25 -; RV32-NEXT: # %bb.24: -; RV32-NEXT: vslidedown.vi v26, v10, 8 -; RV32-NEXT: j .LBB6_26 -; RV32-NEXT: .LBB6_25: -; RV32-NEXT: vslidedown.vi v26, v8, 8 -; RV32-NEXT: .LBB6_26: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 48(sp) -; RV32-NEXT: bnez a0, .LBB6_28 -; RV32-NEXT: # %bb.27: -; RV32-NEXT: vslidedown.vi v26, v10, 7 -; RV32-NEXT: j .LBB6_29 -; RV32-NEXT: .LBB6_28: -; RV32-NEXT: vslidedown.vi v26, v8, 7 -; RV32-NEXT: .LBB6_29: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 46(sp) -; RV32-NEXT: bnez a0, .LBB6_31 -; RV32-NEXT: # %bb.30: -; RV32-NEXT: vslidedown.vi v26, v10, 6 -; RV32-NEXT: j .LBB6_32 -; RV32-NEXT: .LBB6_31: -; RV32-NEXT: vslidedown.vi v26, v8, 6 -; RV32-NEXT: .LBB6_32: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 44(sp) -; RV32-NEXT: bnez a0, .LBB6_34 -; RV32-NEXT: # %bb.33: -; RV32-NEXT: vslidedown.vi v26, v10, 5 -; RV32-NEXT: j .LBB6_35 -; RV32-NEXT: .LBB6_34: -; RV32-NEXT: vslidedown.vi v26, v8, 5 -; RV32-NEXT: .LBB6_35: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 42(sp) -; RV32-NEXT: bnez a0, .LBB6_37 -; RV32-NEXT: # %bb.36: -; RV32-NEXT: vslidedown.vi v26, v10, 4 -; RV32-NEXT: j .LBB6_38 -; RV32-NEXT: .LBB6_37: -; RV32-NEXT: vslidedown.vi v26, v8, 4 -; RV32-NEXT: .LBB6_38: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 40(sp) -; RV32-NEXT: bnez a0, .LBB6_40 -; RV32-NEXT: # %bb.39: -; RV32-NEXT: vslidedown.vi v26, v10, 3 -; RV32-NEXT: j .LBB6_41 -; RV32-NEXT: .LBB6_40: -; RV32-NEXT: vslidedown.vi v26, v8, 3 -; RV32-NEXT: .LBB6_41: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 38(sp) -; RV32-NEXT: bnez a0, .LBB6_43 -; RV32-NEXT: # %bb.42: -; RV32-NEXT: vslidedown.vi v26, v10, 2 -; RV32-NEXT: j .LBB6_44 -; RV32-NEXT: .LBB6_43: -; RV32-NEXT: vslidedown.vi v26, v8, 2 -; RV32-NEXT: .LBB6_44: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 36(sp) -; RV32-NEXT: bnez a0, .LBB6_46 -; RV32-NEXT: # %bb.45: -; RV32-NEXT: vslidedown.vi v26, v10, 1 -; RV32-NEXT: j .LBB6_47 -; RV32-NEXT: .LBB6_46: -; RV32-NEXT: vslidedown.vi v26, v8, 1 -; RV32-NEXT: .LBB6_47: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 34(sp) -; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; RV32-NEXT: addi a0, sp, 32 -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: addi sp, s0, -96 -; RV32-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 96 -; RV32-NEXT: ret -; -; RV64-LABEL: select_v16f16: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -96 -; RV64-NEXT: .cfi_def_cfa_offset 96 -; RV64-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: addi s0, sp, 96 -; RV64-NEXT: .cfi_def_cfa s0, 0 -; RV64-NEXT: andi sp, sp, -32 -; RV64-NEXT: bnez a0, .LBB6_3 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: fsh ft0, 32(sp) -; RV64-NEXT: beqz a0, .LBB6_4 -; RV64-NEXT: .LBB6_2: -; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, mu -; RV64-NEXT: vslidedown.vi v26, v8, 15 -; RV64-NEXT: j .LBB6_5 -; RV64-NEXT: .LBB6_3: -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: fsh ft0, 32(sp) -; RV64-NEXT: bnez a0, .LBB6_2 -; RV64-NEXT: .LBB6_4: -; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, mu -; RV64-NEXT: vslidedown.vi v26, v10, 15 -; RV64-NEXT: .LBB6_5: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 62(sp) -; RV64-NEXT: bnez a0, .LBB6_7 -; RV64-NEXT: # %bb.6: -; RV64-NEXT: vslidedown.vi v26, v10, 14 -; RV64-NEXT: j .LBB6_8 -; RV64-NEXT: .LBB6_7: -; RV64-NEXT: vslidedown.vi v26, v8, 14 -; RV64-NEXT: .LBB6_8: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 60(sp) -; RV64-NEXT: bnez a0, .LBB6_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: vslidedown.vi v26, v10, 13 -; RV64-NEXT: j .LBB6_11 -; RV64-NEXT: .LBB6_10: -; RV64-NEXT: vslidedown.vi v26, v8, 13 -; RV64-NEXT: .LBB6_11: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 58(sp) -; RV64-NEXT: bnez a0, .LBB6_13 -; RV64-NEXT: # %bb.12: -; RV64-NEXT: vslidedown.vi v26, v10, 12 -; RV64-NEXT: j .LBB6_14 -; RV64-NEXT: .LBB6_13: -; RV64-NEXT: vslidedown.vi v26, v8, 12 -; RV64-NEXT: .LBB6_14: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 56(sp) -; RV64-NEXT: bnez a0, .LBB6_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: vslidedown.vi v26, v10, 11 -; RV64-NEXT: j .LBB6_17 -; RV64-NEXT: .LBB6_16: -; RV64-NEXT: vslidedown.vi v26, v8, 11 -; RV64-NEXT: .LBB6_17: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 54(sp) -; RV64-NEXT: bnez a0, .LBB6_19 -; RV64-NEXT: # %bb.18: -; RV64-NEXT: vslidedown.vi v26, v10, 10 -; RV64-NEXT: j .LBB6_20 -; RV64-NEXT: .LBB6_19: -; RV64-NEXT: vslidedown.vi v26, v8, 10 -; RV64-NEXT: .LBB6_20: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 52(sp) -; RV64-NEXT: bnez a0, .LBB6_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: vslidedown.vi v26, v10, 9 -; RV64-NEXT: j .LBB6_23 -; RV64-NEXT: .LBB6_22: -; RV64-NEXT: vslidedown.vi v26, v8, 9 -; RV64-NEXT: .LBB6_23: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 50(sp) -; RV64-NEXT: bnez a0, .LBB6_25 -; RV64-NEXT: # %bb.24: -; RV64-NEXT: vslidedown.vi v26, v10, 8 -; RV64-NEXT: j .LBB6_26 -; RV64-NEXT: .LBB6_25: -; RV64-NEXT: vslidedown.vi v26, v8, 8 -; RV64-NEXT: .LBB6_26: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 48(sp) -; RV64-NEXT: bnez a0, .LBB6_28 -; RV64-NEXT: # %bb.27: -; RV64-NEXT: vslidedown.vi v26, v10, 7 -; RV64-NEXT: j .LBB6_29 -; RV64-NEXT: .LBB6_28: -; RV64-NEXT: vslidedown.vi v26, v8, 7 -; RV64-NEXT: .LBB6_29: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 46(sp) -; RV64-NEXT: bnez a0, .LBB6_31 -; RV64-NEXT: # %bb.30: -; RV64-NEXT: vslidedown.vi v26, v10, 6 -; RV64-NEXT: j .LBB6_32 -; RV64-NEXT: .LBB6_31: -; RV64-NEXT: vslidedown.vi v26, v8, 6 -; RV64-NEXT: .LBB6_32: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 44(sp) -; RV64-NEXT: bnez a0, .LBB6_34 -; RV64-NEXT: # %bb.33: -; RV64-NEXT: vslidedown.vi v26, v10, 5 -; RV64-NEXT: j .LBB6_35 -; RV64-NEXT: .LBB6_34: -; RV64-NEXT: vslidedown.vi v26, v8, 5 -; RV64-NEXT: .LBB6_35: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 42(sp) -; RV64-NEXT: bnez a0, .LBB6_37 -; RV64-NEXT: # %bb.36: -; RV64-NEXT: vslidedown.vi v26, v10, 4 -; RV64-NEXT: j .LBB6_38 -; RV64-NEXT: .LBB6_37: -; RV64-NEXT: vslidedown.vi v26, v8, 4 -; RV64-NEXT: .LBB6_38: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 40(sp) -; RV64-NEXT: bnez a0, .LBB6_40 -; RV64-NEXT: # %bb.39: -; RV64-NEXT: vslidedown.vi v26, v10, 3 -; RV64-NEXT: j .LBB6_41 -; RV64-NEXT: .LBB6_40: -; RV64-NEXT: vslidedown.vi v26, v8, 3 -; RV64-NEXT: .LBB6_41: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 38(sp) -; RV64-NEXT: bnez a0, .LBB6_43 -; RV64-NEXT: # %bb.42: -; RV64-NEXT: vslidedown.vi v26, v10, 2 -; RV64-NEXT: j .LBB6_44 -; RV64-NEXT: .LBB6_43: -; RV64-NEXT: vslidedown.vi v26, v8, 2 -; RV64-NEXT: .LBB6_44: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 36(sp) -; RV64-NEXT: bnez a0, .LBB6_46 -; RV64-NEXT: # %bb.45: -; RV64-NEXT: vslidedown.vi v26, v10, 1 -; RV64-NEXT: j .LBB6_47 -; RV64-NEXT: .LBB6_46: -; RV64-NEXT: vslidedown.vi v26, v8, 1 -; RV64-NEXT: .LBB6_47: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 34(sp) -; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: addi sp, s0, -96 -; RV64-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 96 -; RV64-NEXT: ret +; CHECK-LABEL: select_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret %v = select i1 %c, <16 x half> %a, <16 x half> %b ret <16 x half> %v } define <16 x half> @selectcc_v16f16(half %a, half %b, <16 x half> %c, <16 x half> %d) { -; RV32-LABEL: selectcc_v16f16: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -96 -; RV32-NEXT: .cfi_def_cfa_offset 96 -; RV32-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: addi s0, sp, 96 -; RV32-NEXT: .cfi_def_cfa s0, 0 -; RV32-NEXT: andi sp, sp, -32 -; RV32-NEXT: feq.h a0, fa0, fa1 -; RV32-NEXT: bnez a0, .LBB7_3 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v10 -; RV32-NEXT: fsh ft0, 32(sp) -; RV32-NEXT: beqz a0, .LBB7_4 -; RV32-NEXT: .LBB7_2: -; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, mu -; RV32-NEXT: vslidedown.vi v26, v8, 15 -; RV32-NEXT: j .LBB7_5 -; RV32-NEXT: .LBB7_3: -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fsh ft0, 32(sp) -; RV32-NEXT: bnez a0, .LBB7_2 -; RV32-NEXT: .LBB7_4: -; RV32-NEXT: vsetivli zero, 1, e16, m2, ta, mu -; RV32-NEXT: vslidedown.vi v26, v10, 15 -; RV32-NEXT: .LBB7_5: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 62(sp) -; RV32-NEXT: bnez a0, .LBB7_7 -; RV32-NEXT: # %bb.6: -; RV32-NEXT: vslidedown.vi v26, v10, 14 -; RV32-NEXT: j .LBB7_8 -; RV32-NEXT: .LBB7_7: -; RV32-NEXT: vslidedown.vi v26, v8, 14 -; RV32-NEXT: .LBB7_8: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 60(sp) -; RV32-NEXT: bnez a0, .LBB7_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: vslidedown.vi v26, v10, 13 -; RV32-NEXT: j .LBB7_11 -; RV32-NEXT: .LBB7_10: -; RV32-NEXT: vslidedown.vi v26, v8, 13 -; RV32-NEXT: .LBB7_11: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 58(sp) -; RV32-NEXT: bnez a0, .LBB7_13 -; RV32-NEXT: # %bb.12: -; RV32-NEXT: vslidedown.vi v26, v10, 12 -; RV32-NEXT: j .LBB7_14 -; RV32-NEXT: .LBB7_13: -; RV32-NEXT: vslidedown.vi v26, v8, 12 -; RV32-NEXT: .LBB7_14: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 56(sp) -; RV32-NEXT: bnez a0, .LBB7_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: vslidedown.vi v26, v10, 11 -; RV32-NEXT: j .LBB7_17 -; RV32-NEXT: .LBB7_16: -; RV32-NEXT: vslidedown.vi v26, v8, 11 -; RV32-NEXT: .LBB7_17: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 54(sp) -; RV32-NEXT: bnez a0, .LBB7_19 -; RV32-NEXT: # %bb.18: -; RV32-NEXT: vslidedown.vi v26, v10, 10 -; RV32-NEXT: j .LBB7_20 -; RV32-NEXT: .LBB7_19: -; RV32-NEXT: vslidedown.vi v26, v8, 10 -; RV32-NEXT: .LBB7_20: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 52(sp) -; RV32-NEXT: bnez a0, .LBB7_22 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: vslidedown.vi v26, v10, 9 -; RV32-NEXT: j .LBB7_23 -; RV32-NEXT: .LBB7_22: -; RV32-NEXT: vslidedown.vi v26, v8, 9 -; RV32-NEXT: .LBB7_23: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 50(sp) -; RV32-NEXT: bnez a0, .LBB7_25 -; RV32-NEXT: # %bb.24: -; RV32-NEXT: vslidedown.vi v26, v10, 8 -; RV32-NEXT: j .LBB7_26 -; RV32-NEXT: .LBB7_25: -; RV32-NEXT: vslidedown.vi v26, v8, 8 -; RV32-NEXT: .LBB7_26: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 48(sp) -; RV32-NEXT: bnez a0, .LBB7_28 -; RV32-NEXT: # %bb.27: -; RV32-NEXT: vslidedown.vi v26, v10, 7 -; RV32-NEXT: j .LBB7_29 -; RV32-NEXT: .LBB7_28: -; RV32-NEXT: vslidedown.vi v26, v8, 7 -; RV32-NEXT: .LBB7_29: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 46(sp) -; RV32-NEXT: bnez a0, .LBB7_31 -; RV32-NEXT: # %bb.30: -; RV32-NEXT: vslidedown.vi v26, v10, 6 -; RV32-NEXT: j .LBB7_32 -; RV32-NEXT: .LBB7_31: -; RV32-NEXT: vslidedown.vi v26, v8, 6 -; RV32-NEXT: .LBB7_32: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 44(sp) -; RV32-NEXT: bnez a0, .LBB7_34 -; RV32-NEXT: # %bb.33: -; RV32-NEXT: vslidedown.vi v26, v10, 5 -; RV32-NEXT: j .LBB7_35 -; RV32-NEXT: .LBB7_34: -; RV32-NEXT: vslidedown.vi v26, v8, 5 -; RV32-NEXT: .LBB7_35: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 42(sp) -; RV32-NEXT: bnez a0, .LBB7_37 -; RV32-NEXT: # %bb.36: -; RV32-NEXT: vslidedown.vi v26, v10, 4 -; RV32-NEXT: j .LBB7_38 -; RV32-NEXT: .LBB7_37: -; RV32-NEXT: vslidedown.vi v26, v8, 4 -; RV32-NEXT: .LBB7_38: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 40(sp) -; RV32-NEXT: bnez a0, .LBB7_40 -; RV32-NEXT: # %bb.39: -; RV32-NEXT: vslidedown.vi v26, v10, 3 -; RV32-NEXT: j .LBB7_41 -; RV32-NEXT: .LBB7_40: -; RV32-NEXT: vslidedown.vi v26, v8, 3 -; RV32-NEXT: .LBB7_41: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 38(sp) -; RV32-NEXT: bnez a0, .LBB7_43 -; RV32-NEXT: # %bb.42: -; RV32-NEXT: vslidedown.vi v26, v10, 2 -; RV32-NEXT: j .LBB7_44 -; RV32-NEXT: .LBB7_43: -; RV32-NEXT: vslidedown.vi v26, v8, 2 -; RV32-NEXT: .LBB7_44: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 36(sp) -; RV32-NEXT: bnez a0, .LBB7_46 -; RV32-NEXT: # %bb.45: -; RV32-NEXT: vslidedown.vi v26, v10, 1 -; RV32-NEXT: j .LBB7_47 -; RV32-NEXT: .LBB7_46: -; RV32-NEXT: vslidedown.vi v26, v8, 1 -; RV32-NEXT: .LBB7_47: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsh ft0, 34(sp) -; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; RV32-NEXT: addi a0, sp, 32 -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: addi sp, s0, -96 -; RV32-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 96 -; RV32-NEXT: ret -; -; RV64-LABEL: selectcc_v16f16: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -96 -; RV64-NEXT: .cfi_def_cfa_offset 96 -; RV64-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: addi s0, sp, 96 -; RV64-NEXT: .cfi_def_cfa s0, 0 -; RV64-NEXT: andi sp, sp, -32 -; RV64-NEXT: feq.h a0, fa0, fa1 -; RV64-NEXT: bnez a0, .LBB7_3 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: fsh ft0, 32(sp) -; RV64-NEXT: beqz a0, .LBB7_4 -; RV64-NEXT: .LBB7_2: -; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, mu -; RV64-NEXT: vslidedown.vi v26, v8, 15 -; RV64-NEXT: j .LBB7_5 -; RV64-NEXT: .LBB7_3: -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: fsh ft0, 32(sp) -; RV64-NEXT: bnez a0, .LBB7_2 -; RV64-NEXT: .LBB7_4: -; RV64-NEXT: vsetivli zero, 1, e16, m2, ta, mu -; RV64-NEXT: vslidedown.vi v26, v10, 15 -; RV64-NEXT: .LBB7_5: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 62(sp) -; RV64-NEXT: bnez a0, .LBB7_7 -; RV64-NEXT: # %bb.6: -; RV64-NEXT: vslidedown.vi v26, v10, 14 -; RV64-NEXT: j .LBB7_8 -; RV64-NEXT: .LBB7_7: -; RV64-NEXT: vslidedown.vi v26, v8, 14 -; RV64-NEXT: .LBB7_8: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 60(sp) -; RV64-NEXT: bnez a0, .LBB7_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: vslidedown.vi v26, v10, 13 -; RV64-NEXT: j .LBB7_11 -; RV64-NEXT: .LBB7_10: -; RV64-NEXT: vslidedown.vi v26, v8, 13 -; RV64-NEXT: .LBB7_11: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 58(sp) -; RV64-NEXT: bnez a0, .LBB7_13 -; RV64-NEXT: # %bb.12: -; RV64-NEXT: vslidedown.vi v26, v10, 12 -; RV64-NEXT: j .LBB7_14 -; RV64-NEXT: .LBB7_13: -; RV64-NEXT: vslidedown.vi v26, v8, 12 -; RV64-NEXT: .LBB7_14: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 56(sp) -; RV64-NEXT: bnez a0, .LBB7_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: vslidedown.vi v26, v10, 11 -; RV64-NEXT: j .LBB7_17 -; RV64-NEXT: .LBB7_16: -; RV64-NEXT: vslidedown.vi v26, v8, 11 -; RV64-NEXT: .LBB7_17: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 54(sp) -; RV64-NEXT: bnez a0, .LBB7_19 -; RV64-NEXT: # %bb.18: -; RV64-NEXT: vslidedown.vi v26, v10, 10 -; RV64-NEXT: j .LBB7_20 -; RV64-NEXT: .LBB7_19: -; RV64-NEXT: vslidedown.vi v26, v8, 10 -; RV64-NEXT: .LBB7_20: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 52(sp) -; RV64-NEXT: bnez a0, .LBB7_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: vslidedown.vi v26, v10, 9 -; RV64-NEXT: j .LBB7_23 -; RV64-NEXT: .LBB7_22: -; RV64-NEXT: vslidedown.vi v26, v8, 9 -; RV64-NEXT: .LBB7_23: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 50(sp) -; RV64-NEXT: bnez a0, .LBB7_25 -; RV64-NEXT: # %bb.24: -; RV64-NEXT: vslidedown.vi v26, v10, 8 -; RV64-NEXT: j .LBB7_26 -; RV64-NEXT: .LBB7_25: -; RV64-NEXT: vslidedown.vi v26, v8, 8 -; RV64-NEXT: .LBB7_26: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 48(sp) -; RV64-NEXT: bnez a0, .LBB7_28 -; RV64-NEXT: # %bb.27: -; RV64-NEXT: vslidedown.vi v26, v10, 7 -; RV64-NEXT: j .LBB7_29 -; RV64-NEXT: .LBB7_28: -; RV64-NEXT: vslidedown.vi v26, v8, 7 -; RV64-NEXT: .LBB7_29: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 46(sp) -; RV64-NEXT: bnez a0, .LBB7_31 -; RV64-NEXT: # %bb.30: -; RV64-NEXT: vslidedown.vi v26, v10, 6 -; RV64-NEXT: j .LBB7_32 -; RV64-NEXT: .LBB7_31: -; RV64-NEXT: vslidedown.vi v26, v8, 6 -; RV64-NEXT: .LBB7_32: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 44(sp) -; RV64-NEXT: bnez a0, .LBB7_34 -; RV64-NEXT: # %bb.33: -; RV64-NEXT: vslidedown.vi v26, v10, 5 -; RV64-NEXT: j .LBB7_35 -; RV64-NEXT: .LBB7_34: -; RV64-NEXT: vslidedown.vi v26, v8, 5 -; RV64-NEXT: .LBB7_35: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 42(sp) -; RV64-NEXT: bnez a0, .LBB7_37 -; RV64-NEXT: # %bb.36: -; RV64-NEXT: vslidedown.vi v26, v10, 4 -; RV64-NEXT: j .LBB7_38 -; RV64-NEXT: .LBB7_37: -; RV64-NEXT: vslidedown.vi v26, v8, 4 -; RV64-NEXT: .LBB7_38: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 40(sp) -; RV64-NEXT: bnez a0, .LBB7_40 -; RV64-NEXT: # %bb.39: -; RV64-NEXT: vslidedown.vi v26, v10, 3 -; RV64-NEXT: j .LBB7_41 -; RV64-NEXT: .LBB7_40: -; RV64-NEXT: vslidedown.vi v26, v8, 3 -; RV64-NEXT: .LBB7_41: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 38(sp) -; RV64-NEXT: bnez a0, .LBB7_43 -; RV64-NEXT: # %bb.42: -; RV64-NEXT: vslidedown.vi v26, v10, 2 -; RV64-NEXT: j .LBB7_44 -; RV64-NEXT: .LBB7_43: -; RV64-NEXT: vslidedown.vi v26, v8, 2 -; RV64-NEXT: .LBB7_44: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 36(sp) -; RV64-NEXT: bnez a0, .LBB7_46 -; RV64-NEXT: # %bb.45: -; RV64-NEXT: vslidedown.vi v26, v10, 1 -; RV64-NEXT: j .LBB7_47 -; RV64-NEXT: .LBB7_46: -; RV64-NEXT: vslidedown.vi v26, v8, 1 -; RV64-NEXT: .LBB7_47: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsh ft0, 34(sp) -; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: addi sp, s0, -96 -; RV64-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 96 -; RV64-NEXT: ret +; CHECK-LABEL: selectcc_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.h a0, fa0, fa1 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret %cmp = fcmp oeq half %a, %b %v = select i1 %cmp, <16 x half> %c, <16 x half> %d ret <16 x half> %v @@ -1032,24 +119,11 @@ define <2 x float> @select_v2f32(i1 zeroext %c, <2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: select_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: bnez a0, .LBB8_2 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfmv.f.s ft0, v9 -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: j .LBB8_3 -; CHECK-NEXT: .LBB8_2: -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: .LBB8_3: -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; CHECK-NEXT: vfmv.v.f v8, ft1 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, <2 x float> %a, <2 x float> %b ret <2 x float> %v @@ -1059,28 +133,11 @@ ; CHECK-LABEL: selectcc_v2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: feq.s a0, fa0, fa1 -; CHECK-NEXT: bnez a0, .LBB9_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: j .LBB9_3 -; CHECK-NEXT: .LBB9_2: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: .LBB9_3: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; CHECK-NEXT: vfmv.v.f v25, ft0 -; CHECK-NEXT: bnez a0, .LBB9_5 -; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: vfmv.f.s ft0, v9 -; CHECK-NEXT: j .LBB9_6 -; CHECK-NEXT: .LBB9_5: -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: .LBB9_6: -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; CHECK-NEXT: vfmv.s.f v25, ft0 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = fcmp oeq float %a, %b %v = select i1 %cmp, <2 x float> %c, <2 x float> %d @@ -1090,51 +147,11 @@ define <4 x float> @select_v4f32(i1 zeroext %c, <4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: select_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: bnez a0, .LBB10_3 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.f.s ft0, v9 -; CHECK-NEXT: fsw ft0, 16(sp) -; CHECK-NEXT: beqz a0, .LBB10_4 -; CHECK-NEXT: .LBB10_2: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 -; CHECK-NEXT: j .LBB10_5 -; CHECK-NEXT: .LBB10_3: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: fsw ft0, 16(sp) -; CHECK-NEXT: bnez a0, .LBB10_2 -; CHECK-NEXT: .LBB10_4: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v9, 3 -; CHECK-NEXT: .LBB10_5: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsw ft0, 28(sp) -; CHECK-NEXT: bnez a0, .LBB10_7 -; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: vslidedown.vi v25, v9, 2 -; CHECK-NEXT: j .LBB10_8 -; CHECK-NEXT: .LBB10_7: -; CHECK-NEXT: vslidedown.vi v25, v8, 2 -; CHECK-NEXT: .LBB10_8: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsw ft0, 24(sp) -; CHECK-NEXT: bnez a0, .LBB10_10 -; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: j .LBB10_11 -; CHECK-NEXT: .LBB10_10: -; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: .LBB10_11: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsw ft0, 20(sp) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, <4 x float> %a, <4 x float> %b ret <4 x float> %v @@ -1143,52 +160,12 @@ define <4 x float> @selectcc_v4f32(float %a, float %b, <4 x float> %c, <4 x float> %d) { ; CHECK-LABEL: selectcc_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: feq.s a0, fa0, fa1 -; CHECK-NEXT: bnez a0, .LBB11_3 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.f.s ft0, v9 -; CHECK-NEXT: fsw ft0, 16(sp) -; CHECK-NEXT: beqz a0, .LBB11_4 -; CHECK-NEXT: .LBB11_2: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v8, 3 -; CHECK-NEXT: j .LBB11_5 -; CHECK-NEXT: .LBB11_3: -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: fsw ft0, 16(sp) -; CHECK-NEXT: bnez a0, .LBB11_2 -; CHECK-NEXT: .LBB11_4: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v9, 3 -; CHECK-NEXT: .LBB11_5: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsw ft0, 28(sp) -; CHECK-NEXT: bnez a0, .LBB11_7 -; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: vslidedown.vi v25, v9, 2 -; CHECK-NEXT: j .LBB11_8 -; CHECK-NEXT: .LBB11_7: -; CHECK-NEXT: vslidedown.vi v25, v8, 2 -; CHECK-NEXT: .LBB11_8: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsw ft0, 24(sp) -; CHECK-NEXT: bnez a0, .LBB11_10 -; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: j .LBB11_11 -; CHECK-NEXT: .LBB11_10: -; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: .LBB11_11: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: fsw ft0, 20(sp) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = fcmp oeq float %a, %b %v = select i1 %cmp, <4 x float> %c, <4 x float> %d @@ -1196,1070 +173,56 @@ } define <8 x float> @select_v8f32(i1 zeroext %c, <8 x float> %a, <8 x float> %b) { -; RV32-LABEL: select_v8f32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -96 -; RV32-NEXT: .cfi_def_cfa_offset 96 -; RV32-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: addi s0, sp, 96 -; RV32-NEXT: .cfi_def_cfa s0, 0 -; RV32-NEXT: andi sp, sp, -32 -; RV32-NEXT: bnez a0, .LBB12_3 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v10 -; RV32-NEXT: fsw ft0, 32(sp) -; RV32-NEXT: beqz a0, .LBB12_4 -; RV32-NEXT: .LBB12_2: -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: vslidedown.vi v26, v8, 7 -; RV32-NEXT: j .LBB12_5 -; RV32-NEXT: .LBB12_3: -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fsw ft0, 32(sp) -; RV32-NEXT: bnez a0, .LBB12_2 -; RV32-NEXT: .LBB12_4: -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: vslidedown.vi v26, v10, 7 -; RV32-NEXT: .LBB12_5: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsw ft0, 60(sp) -; RV32-NEXT: bnez a0, .LBB12_7 -; RV32-NEXT: # %bb.6: -; RV32-NEXT: vslidedown.vi v26, v10, 6 -; RV32-NEXT: j .LBB12_8 -; RV32-NEXT: .LBB12_7: -; RV32-NEXT: vslidedown.vi v26, v8, 6 -; RV32-NEXT: .LBB12_8: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsw ft0, 56(sp) -; RV32-NEXT: bnez a0, .LBB12_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: vslidedown.vi v26, v10, 5 -; RV32-NEXT: j .LBB12_11 -; RV32-NEXT: .LBB12_10: -; RV32-NEXT: vslidedown.vi v26, v8, 5 -; RV32-NEXT: .LBB12_11: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsw ft0, 52(sp) -; RV32-NEXT: bnez a0, .LBB12_13 -; RV32-NEXT: # %bb.12: -; RV32-NEXT: vslidedown.vi v26, v10, 4 -; RV32-NEXT: j .LBB12_14 -; RV32-NEXT: .LBB12_13: -; RV32-NEXT: vslidedown.vi v26, v8, 4 -; RV32-NEXT: .LBB12_14: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsw ft0, 48(sp) -; RV32-NEXT: bnez a0, .LBB12_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: vslidedown.vi v26, v10, 3 -; RV32-NEXT: j .LBB12_17 -; RV32-NEXT: .LBB12_16: -; RV32-NEXT: vslidedown.vi v26, v8, 3 -; RV32-NEXT: .LBB12_17: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsw ft0, 44(sp) -; RV32-NEXT: bnez a0, .LBB12_19 -; RV32-NEXT: # %bb.18: -; RV32-NEXT: vslidedown.vi v26, v10, 2 -; RV32-NEXT: j .LBB12_20 -; RV32-NEXT: .LBB12_19: -; RV32-NEXT: vslidedown.vi v26, v8, 2 -; RV32-NEXT: .LBB12_20: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsw ft0, 40(sp) -; RV32-NEXT: bnez a0, .LBB12_22 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: vslidedown.vi v26, v10, 1 -; RV32-NEXT: j .LBB12_23 -; RV32-NEXT: .LBB12_22: -; RV32-NEXT: vslidedown.vi v26, v8, 1 -; RV32-NEXT: .LBB12_23: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsw ft0, 36(sp) -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: addi a0, sp, 32 -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: addi sp, s0, -96 -; RV32-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 96 -; RV32-NEXT: ret -; -; RV64-LABEL: select_v8f32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -96 -; RV64-NEXT: .cfi_def_cfa_offset 96 -; RV64-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: addi s0, sp, 96 -; RV64-NEXT: .cfi_def_cfa s0, 0 -; RV64-NEXT: andi sp, sp, -32 -; RV64-NEXT: bnez a0, .LBB12_3 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: fsw ft0, 32(sp) -; RV64-NEXT: beqz a0, .LBB12_4 -; RV64-NEXT: .LBB12_2: -; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV64-NEXT: vslidedown.vi v26, v8, 7 -; RV64-NEXT: j .LBB12_5 -; RV64-NEXT: .LBB12_3: -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: fsw ft0, 32(sp) -; RV64-NEXT: bnez a0, .LBB12_2 -; RV64-NEXT: .LBB12_4: -; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV64-NEXT: vslidedown.vi v26, v10, 7 -; RV64-NEXT: .LBB12_5: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsw ft0, 60(sp) -; RV64-NEXT: bnez a0, .LBB12_7 -; RV64-NEXT: # %bb.6: -; RV64-NEXT: vslidedown.vi v26, v10, 6 -; RV64-NEXT: j .LBB12_8 -; RV64-NEXT: .LBB12_7: -; RV64-NEXT: vslidedown.vi v26, v8, 6 -; RV64-NEXT: .LBB12_8: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsw ft0, 56(sp) -; RV64-NEXT: bnez a0, .LBB12_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: vslidedown.vi v26, v10, 5 -; RV64-NEXT: j .LBB12_11 -; RV64-NEXT: .LBB12_10: -; RV64-NEXT: vslidedown.vi v26, v8, 5 -; RV64-NEXT: .LBB12_11: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsw ft0, 52(sp) -; RV64-NEXT: bnez a0, .LBB12_13 -; RV64-NEXT: # %bb.12: -; RV64-NEXT: vslidedown.vi v26, v10, 4 -; RV64-NEXT: j .LBB12_14 -; RV64-NEXT: .LBB12_13: -; RV64-NEXT: vslidedown.vi v26, v8, 4 -; RV64-NEXT: .LBB12_14: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsw ft0, 48(sp) -; RV64-NEXT: bnez a0, .LBB12_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: vslidedown.vi v26, v10, 3 -; RV64-NEXT: j .LBB12_17 -; RV64-NEXT: .LBB12_16: -; RV64-NEXT: vslidedown.vi v26, v8, 3 -; RV64-NEXT: .LBB12_17: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsw ft0, 44(sp) -; RV64-NEXT: bnez a0, .LBB12_19 -; RV64-NEXT: # %bb.18: -; RV64-NEXT: vslidedown.vi v26, v10, 2 -; RV64-NEXT: j .LBB12_20 -; RV64-NEXT: .LBB12_19: -; RV64-NEXT: vslidedown.vi v26, v8, 2 -; RV64-NEXT: .LBB12_20: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsw ft0, 40(sp) -; RV64-NEXT: bnez a0, .LBB12_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: vslidedown.vi v26, v10, 1 -; RV64-NEXT: j .LBB12_23 -; RV64-NEXT: .LBB12_22: -; RV64-NEXT: vslidedown.vi v26, v8, 1 -; RV64-NEXT: .LBB12_23: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsw ft0, 36(sp) -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: addi sp, s0, -96 -; RV64-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 96 -; RV64-NEXT: ret +; CHECK-LABEL: select_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret %v = select i1 %c, <8 x float> %a, <8 x float> %b ret <8 x float> %v } define <8 x float> @selectcc_v8f32(float %a, float %b, <8 x float> %c, <8 x float> %d) { -; RV32-LABEL: selectcc_v8f32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -96 -; RV32-NEXT: .cfi_def_cfa_offset 96 -; RV32-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: addi s0, sp, 96 -; RV32-NEXT: .cfi_def_cfa s0, 0 -; RV32-NEXT: andi sp, sp, -32 -; RV32-NEXT: feq.s a0, fa0, fa1 -; RV32-NEXT: bnez a0, .LBB13_3 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v10 -; RV32-NEXT: fsw ft0, 32(sp) -; RV32-NEXT: beqz a0, .LBB13_4 -; RV32-NEXT: .LBB13_2: -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: vslidedown.vi v26, v8, 7 -; RV32-NEXT: j .LBB13_5 -; RV32-NEXT: .LBB13_3: -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fsw ft0, 32(sp) -; RV32-NEXT: bnez a0, .LBB13_2 -; RV32-NEXT: .LBB13_4: -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV32-NEXT: vslidedown.vi v26, v10, 7 -; RV32-NEXT: .LBB13_5: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsw ft0, 60(sp) -; RV32-NEXT: bnez a0, .LBB13_7 -; RV32-NEXT: # %bb.6: -; RV32-NEXT: vslidedown.vi v26, v10, 6 -; RV32-NEXT: j .LBB13_8 -; RV32-NEXT: .LBB13_7: -; RV32-NEXT: vslidedown.vi v26, v8, 6 -; RV32-NEXT: .LBB13_8: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsw ft0, 56(sp) -; RV32-NEXT: bnez a0, .LBB13_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: vslidedown.vi v26, v10, 5 -; RV32-NEXT: j .LBB13_11 -; RV32-NEXT: .LBB13_10: -; RV32-NEXT: vslidedown.vi v26, v8, 5 -; RV32-NEXT: .LBB13_11: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsw ft0, 52(sp) -; RV32-NEXT: bnez a0, .LBB13_13 -; RV32-NEXT: # %bb.12: -; RV32-NEXT: vslidedown.vi v26, v10, 4 -; RV32-NEXT: j .LBB13_14 -; RV32-NEXT: .LBB13_13: -; RV32-NEXT: vslidedown.vi v26, v8, 4 -; RV32-NEXT: .LBB13_14: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsw ft0, 48(sp) -; RV32-NEXT: bnez a0, .LBB13_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: vslidedown.vi v26, v10, 3 -; RV32-NEXT: j .LBB13_17 -; RV32-NEXT: .LBB13_16: -; RV32-NEXT: vslidedown.vi v26, v8, 3 -; RV32-NEXT: .LBB13_17: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsw ft0, 44(sp) -; RV32-NEXT: bnez a0, .LBB13_19 -; RV32-NEXT: # %bb.18: -; RV32-NEXT: vslidedown.vi v26, v10, 2 -; RV32-NEXT: j .LBB13_20 -; RV32-NEXT: .LBB13_19: -; RV32-NEXT: vslidedown.vi v26, v8, 2 -; RV32-NEXT: .LBB13_20: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsw ft0, 40(sp) -; RV32-NEXT: bnez a0, .LBB13_22 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: vslidedown.vi v26, v10, 1 -; RV32-NEXT: j .LBB13_23 -; RV32-NEXT: .LBB13_22: -; RV32-NEXT: vslidedown.vi v26, v8, 1 -; RV32-NEXT: .LBB13_23: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsw ft0, 36(sp) -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: addi a0, sp, 32 -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: addi sp, s0, -96 -; RV32-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 96 -; RV32-NEXT: ret -; -; RV64-LABEL: selectcc_v8f32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -96 -; RV64-NEXT: .cfi_def_cfa_offset 96 -; RV64-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: addi s0, sp, 96 -; RV64-NEXT: .cfi_def_cfa s0, 0 -; RV64-NEXT: andi sp, sp, -32 -; RV64-NEXT: feq.s a0, fa0, fa1 -; RV64-NEXT: bnez a0, .LBB13_3 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: fsw ft0, 32(sp) -; RV64-NEXT: beqz a0, .LBB13_4 -; RV64-NEXT: .LBB13_2: -; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV64-NEXT: vslidedown.vi v26, v8, 7 -; RV64-NEXT: j .LBB13_5 -; RV64-NEXT: .LBB13_3: -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: fsw ft0, 32(sp) -; RV64-NEXT: bnez a0, .LBB13_2 -; RV64-NEXT: .LBB13_4: -; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; RV64-NEXT: vslidedown.vi v26, v10, 7 -; RV64-NEXT: .LBB13_5: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsw ft0, 60(sp) -; RV64-NEXT: bnez a0, .LBB13_7 -; RV64-NEXT: # %bb.6: -; RV64-NEXT: vslidedown.vi v26, v10, 6 -; RV64-NEXT: j .LBB13_8 -; RV64-NEXT: .LBB13_7: -; RV64-NEXT: vslidedown.vi v26, v8, 6 -; RV64-NEXT: .LBB13_8: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsw ft0, 56(sp) -; RV64-NEXT: bnez a0, .LBB13_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: vslidedown.vi v26, v10, 5 -; RV64-NEXT: j .LBB13_11 -; RV64-NEXT: .LBB13_10: -; RV64-NEXT: vslidedown.vi v26, v8, 5 -; RV64-NEXT: .LBB13_11: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsw ft0, 52(sp) -; RV64-NEXT: bnez a0, .LBB13_13 -; RV64-NEXT: # %bb.12: -; RV64-NEXT: vslidedown.vi v26, v10, 4 -; RV64-NEXT: j .LBB13_14 -; RV64-NEXT: .LBB13_13: -; RV64-NEXT: vslidedown.vi v26, v8, 4 -; RV64-NEXT: .LBB13_14: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsw ft0, 48(sp) -; RV64-NEXT: bnez a0, .LBB13_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: vslidedown.vi v26, v10, 3 -; RV64-NEXT: j .LBB13_17 -; RV64-NEXT: .LBB13_16: -; RV64-NEXT: vslidedown.vi v26, v8, 3 -; RV64-NEXT: .LBB13_17: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsw ft0, 44(sp) -; RV64-NEXT: bnez a0, .LBB13_19 -; RV64-NEXT: # %bb.18: -; RV64-NEXT: vslidedown.vi v26, v10, 2 -; RV64-NEXT: j .LBB13_20 -; RV64-NEXT: .LBB13_19: -; RV64-NEXT: vslidedown.vi v26, v8, 2 -; RV64-NEXT: .LBB13_20: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsw ft0, 40(sp) -; RV64-NEXT: bnez a0, .LBB13_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: vslidedown.vi v26, v10, 1 -; RV64-NEXT: j .LBB13_23 -; RV64-NEXT: .LBB13_22: -; RV64-NEXT: vslidedown.vi v26, v8, 1 -; RV64-NEXT: .LBB13_23: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsw ft0, 36(sp) -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: addi sp, s0, -96 -; RV64-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 96 -; RV64-NEXT: ret +; CHECK-LABEL: selectcc_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.s a0, fa0, fa1 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret %cmp = fcmp oeq float %a, %b %v = select i1 %cmp, <8 x float> %c, <8 x float> %d ret <8 x float> %v } define <16 x float> @select_v16f32(i1 zeroext %c, <16 x float> %a, <16 x float> %b) { -; RV32-LABEL: select_v16f32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -192 -; RV32-NEXT: .cfi_def_cfa_offset 192 -; RV32-NEXT: sw ra, 188(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 184(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: addi s0, sp, 192 -; RV32-NEXT: .cfi_def_cfa s0, 0 -; RV32-NEXT: andi sp, sp, -64 -; RV32-NEXT: bnez a0, .LBB14_3 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v12 -; RV32-NEXT: fsw ft0, 64(sp) -; RV32-NEXT: beqz a0, .LBB14_4 -; RV32-NEXT: .LBB14_2: -; RV32-NEXT: vsetivli zero, 1, e32, m4, ta, mu -; RV32-NEXT: vslidedown.vi v28, v8, 15 -; RV32-NEXT: j .LBB14_5 -; RV32-NEXT: .LBB14_3: -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fsw ft0, 64(sp) -; RV32-NEXT: bnez a0, .LBB14_2 -; RV32-NEXT: .LBB14_4: -; RV32-NEXT: vsetivli zero, 1, e32, m4, ta, mu -; RV32-NEXT: vslidedown.vi v28, v12, 15 -; RV32-NEXT: .LBB14_5: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 124(sp) -; RV32-NEXT: bnez a0, .LBB14_7 -; RV32-NEXT: # %bb.6: -; RV32-NEXT: vslidedown.vi v28, v12, 14 -; RV32-NEXT: j .LBB14_8 -; RV32-NEXT: .LBB14_7: -; RV32-NEXT: vslidedown.vi v28, v8, 14 -; RV32-NEXT: .LBB14_8: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 120(sp) -; RV32-NEXT: bnez a0, .LBB14_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: vslidedown.vi v28, v12, 13 -; RV32-NEXT: j .LBB14_11 -; RV32-NEXT: .LBB14_10: -; RV32-NEXT: vslidedown.vi v28, v8, 13 -; RV32-NEXT: .LBB14_11: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 116(sp) -; RV32-NEXT: bnez a0, .LBB14_13 -; RV32-NEXT: # %bb.12: -; RV32-NEXT: vslidedown.vi v28, v12, 12 -; RV32-NEXT: j .LBB14_14 -; RV32-NEXT: .LBB14_13: -; RV32-NEXT: vslidedown.vi v28, v8, 12 -; RV32-NEXT: .LBB14_14: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 112(sp) -; RV32-NEXT: bnez a0, .LBB14_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: vslidedown.vi v28, v12, 11 -; RV32-NEXT: j .LBB14_17 -; RV32-NEXT: .LBB14_16: -; RV32-NEXT: vslidedown.vi v28, v8, 11 -; RV32-NEXT: .LBB14_17: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 108(sp) -; RV32-NEXT: bnez a0, .LBB14_19 -; RV32-NEXT: # %bb.18: -; RV32-NEXT: vslidedown.vi v28, v12, 10 -; RV32-NEXT: j .LBB14_20 -; RV32-NEXT: .LBB14_19: -; RV32-NEXT: vslidedown.vi v28, v8, 10 -; RV32-NEXT: .LBB14_20: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 104(sp) -; RV32-NEXT: bnez a0, .LBB14_22 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: vslidedown.vi v28, v12, 9 -; RV32-NEXT: j .LBB14_23 -; RV32-NEXT: .LBB14_22: -; RV32-NEXT: vslidedown.vi v28, v8, 9 -; RV32-NEXT: .LBB14_23: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 100(sp) -; RV32-NEXT: bnez a0, .LBB14_25 -; RV32-NEXT: # %bb.24: -; RV32-NEXT: vslidedown.vi v28, v12, 8 -; RV32-NEXT: j .LBB14_26 -; RV32-NEXT: .LBB14_25: -; RV32-NEXT: vslidedown.vi v28, v8, 8 -; RV32-NEXT: .LBB14_26: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 96(sp) -; RV32-NEXT: bnez a0, .LBB14_28 -; RV32-NEXT: # %bb.27: -; RV32-NEXT: vslidedown.vi v28, v12, 7 -; RV32-NEXT: j .LBB14_29 -; RV32-NEXT: .LBB14_28: -; RV32-NEXT: vslidedown.vi v28, v8, 7 -; RV32-NEXT: .LBB14_29: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 92(sp) -; RV32-NEXT: bnez a0, .LBB14_31 -; RV32-NEXT: # %bb.30: -; RV32-NEXT: vslidedown.vi v28, v12, 6 -; RV32-NEXT: j .LBB14_32 -; RV32-NEXT: .LBB14_31: -; RV32-NEXT: vslidedown.vi v28, v8, 6 -; RV32-NEXT: .LBB14_32: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 88(sp) -; RV32-NEXT: bnez a0, .LBB14_34 -; RV32-NEXT: # %bb.33: -; RV32-NEXT: vslidedown.vi v28, v12, 5 -; RV32-NEXT: j .LBB14_35 -; RV32-NEXT: .LBB14_34: -; RV32-NEXT: vslidedown.vi v28, v8, 5 -; RV32-NEXT: .LBB14_35: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 84(sp) -; RV32-NEXT: bnez a0, .LBB14_37 -; RV32-NEXT: # %bb.36: -; RV32-NEXT: vslidedown.vi v28, v12, 4 -; RV32-NEXT: j .LBB14_38 -; RV32-NEXT: .LBB14_37: -; RV32-NEXT: vslidedown.vi v28, v8, 4 -; RV32-NEXT: .LBB14_38: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 80(sp) -; RV32-NEXT: bnez a0, .LBB14_40 -; RV32-NEXT: # %bb.39: -; RV32-NEXT: vslidedown.vi v28, v12, 3 -; RV32-NEXT: j .LBB14_41 -; RV32-NEXT: .LBB14_40: -; RV32-NEXT: vslidedown.vi v28, v8, 3 -; RV32-NEXT: .LBB14_41: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 76(sp) -; RV32-NEXT: bnez a0, .LBB14_43 -; RV32-NEXT: # %bb.42: -; RV32-NEXT: vslidedown.vi v28, v12, 2 -; RV32-NEXT: j .LBB14_44 -; RV32-NEXT: .LBB14_43: -; RV32-NEXT: vslidedown.vi v28, v8, 2 -; RV32-NEXT: .LBB14_44: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 72(sp) -; RV32-NEXT: bnez a0, .LBB14_46 -; RV32-NEXT: # %bb.45: -; RV32-NEXT: vslidedown.vi v28, v12, 1 -; RV32-NEXT: j .LBB14_47 -; RV32-NEXT: .LBB14_46: -; RV32-NEXT: vslidedown.vi v28, v8, 1 -; RV32-NEXT: .LBB14_47: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 68(sp) -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: addi a0, sp, 64 -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: addi sp, s0, -192 -; RV32-NEXT: lw s0, 184(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 188(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 192 -; RV32-NEXT: ret -; -; RV64-LABEL: select_v16f32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -192 -; RV64-NEXT: .cfi_def_cfa_offset 192 -; RV64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: addi s0, sp, 192 -; RV64-NEXT: .cfi_def_cfa s0, 0 -; RV64-NEXT: andi sp, sp, -64 -; RV64-NEXT: bnez a0, .LBB14_3 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v12 -; RV64-NEXT: fsw ft0, 64(sp) -; RV64-NEXT: beqz a0, .LBB14_4 -; RV64-NEXT: .LBB14_2: -; RV64-NEXT: vsetivli zero, 1, e32, m4, ta, mu -; RV64-NEXT: vslidedown.vi v28, v8, 15 -; RV64-NEXT: j .LBB14_5 -; RV64-NEXT: .LBB14_3: -; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: fsw ft0, 64(sp) -; RV64-NEXT: bnez a0, .LBB14_2 -; RV64-NEXT: .LBB14_4: -; RV64-NEXT: vsetivli zero, 1, e32, m4, ta, mu -; RV64-NEXT: vslidedown.vi v28, v12, 15 -; RV64-NEXT: .LBB14_5: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 124(sp) -; RV64-NEXT: bnez a0, .LBB14_7 -; RV64-NEXT: # %bb.6: -; RV64-NEXT: vslidedown.vi v28, v12, 14 -; RV64-NEXT: j .LBB14_8 -; RV64-NEXT: .LBB14_7: -; RV64-NEXT: vslidedown.vi v28, v8, 14 -; RV64-NEXT: .LBB14_8: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 120(sp) -; RV64-NEXT: bnez a0, .LBB14_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: vslidedown.vi v28, v12, 13 -; RV64-NEXT: j .LBB14_11 -; RV64-NEXT: .LBB14_10: -; RV64-NEXT: vslidedown.vi v28, v8, 13 -; RV64-NEXT: .LBB14_11: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 116(sp) -; RV64-NEXT: bnez a0, .LBB14_13 -; RV64-NEXT: # %bb.12: -; RV64-NEXT: vslidedown.vi v28, v12, 12 -; RV64-NEXT: j .LBB14_14 -; RV64-NEXT: .LBB14_13: -; RV64-NEXT: vslidedown.vi v28, v8, 12 -; RV64-NEXT: .LBB14_14: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 112(sp) -; RV64-NEXT: bnez a0, .LBB14_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: vslidedown.vi v28, v12, 11 -; RV64-NEXT: j .LBB14_17 -; RV64-NEXT: .LBB14_16: -; RV64-NEXT: vslidedown.vi v28, v8, 11 -; RV64-NEXT: .LBB14_17: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 108(sp) -; RV64-NEXT: bnez a0, .LBB14_19 -; RV64-NEXT: # %bb.18: -; RV64-NEXT: vslidedown.vi v28, v12, 10 -; RV64-NEXT: j .LBB14_20 -; RV64-NEXT: .LBB14_19: -; RV64-NEXT: vslidedown.vi v28, v8, 10 -; RV64-NEXT: .LBB14_20: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 104(sp) -; RV64-NEXT: bnez a0, .LBB14_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: vslidedown.vi v28, v12, 9 -; RV64-NEXT: j .LBB14_23 -; RV64-NEXT: .LBB14_22: -; RV64-NEXT: vslidedown.vi v28, v8, 9 -; RV64-NEXT: .LBB14_23: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 100(sp) -; RV64-NEXT: bnez a0, .LBB14_25 -; RV64-NEXT: # %bb.24: -; RV64-NEXT: vslidedown.vi v28, v12, 8 -; RV64-NEXT: j .LBB14_26 -; RV64-NEXT: .LBB14_25: -; RV64-NEXT: vslidedown.vi v28, v8, 8 -; RV64-NEXT: .LBB14_26: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 96(sp) -; RV64-NEXT: bnez a0, .LBB14_28 -; RV64-NEXT: # %bb.27: -; RV64-NEXT: vslidedown.vi v28, v12, 7 -; RV64-NEXT: j .LBB14_29 -; RV64-NEXT: .LBB14_28: -; RV64-NEXT: vslidedown.vi v28, v8, 7 -; RV64-NEXT: .LBB14_29: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 92(sp) -; RV64-NEXT: bnez a0, .LBB14_31 -; RV64-NEXT: # %bb.30: -; RV64-NEXT: vslidedown.vi v28, v12, 6 -; RV64-NEXT: j .LBB14_32 -; RV64-NEXT: .LBB14_31: -; RV64-NEXT: vslidedown.vi v28, v8, 6 -; RV64-NEXT: .LBB14_32: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 88(sp) -; RV64-NEXT: bnez a0, .LBB14_34 -; RV64-NEXT: # %bb.33: -; RV64-NEXT: vslidedown.vi v28, v12, 5 -; RV64-NEXT: j .LBB14_35 -; RV64-NEXT: .LBB14_34: -; RV64-NEXT: vslidedown.vi v28, v8, 5 -; RV64-NEXT: .LBB14_35: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 84(sp) -; RV64-NEXT: bnez a0, .LBB14_37 -; RV64-NEXT: # %bb.36: -; RV64-NEXT: vslidedown.vi v28, v12, 4 -; RV64-NEXT: j .LBB14_38 -; RV64-NEXT: .LBB14_37: -; RV64-NEXT: vslidedown.vi v28, v8, 4 -; RV64-NEXT: .LBB14_38: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 80(sp) -; RV64-NEXT: bnez a0, .LBB14_40 -; RV64-NEXT: # %bb.39: -; RV64-NEXT: vslidedown.vi v28, v12, 3 -; RV64-NEXT: j .LBB14_41 -; RV64-NEXT: .LBB14_40: -; RV64-NEXT: vslidedown.vi v28, v8, 3 -; RV64-NEXT: .LBB14_41: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 76(sp) -; RV64-NEXT: bnez a0, .LBB14_43 -; RV64-NEXT: # %bb.42: -; RV64-NEXT: vslidedown.vi v28, v12, 2 -; RV64-NEXT: j .LBB14_44 -; RV64-NEXT: .LBB14_43: -; RV64-NEXT: vslidedown.vi v28, v8, 2 -; RV64-NEXT: .LBB14_44: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 72(sp) -; RV64-NEXT: bnez a0, .LBB14_46 -; RV64-NEXT: # %bb.45: -; RV64-NEXT: vslidedown.vi v28, v12, 1 -; RV64-NEXT: j .LBB14_47 -; RV64-NEXT: .LBB14_46: -; RV64-NEXT: vslidedown.vi v28, v8, 1 -; RV64-NEXT: .LBB14_47: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 68(sp) -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV64-NEXT: addi a0, sp, 64 -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: addi sp, s0, -192 -; RV64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 192 -; RV64-NEXT: ret +; CHECK-LABEL: select_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: ret %v = select i1 %c, <16 x float> %a, <16 x float> %b ret <16 x float> %v } define <16 x float> @selectcc_v16f32(float %a, float %b, <16 x float> %c, <16 x float> %d) { -; RV32-LABEL: selectcc_v16f32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -192 -; RV32-NEXT: .cfi_def_cfa_offset 192 -; RV32-NEXT: sw ra, 188(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 184(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: addi s0, sp, 192 -; RV32-NEXT: .cfi_def_cfa s0, 0 -; RV32-NEXT: andi sp, sp, -64 -; RV32-NEXT: feq.s a0, fa0, fa1 -; RV32-NEXT: bnez a0, .LBB15_3 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v12 -; RV32-NEXT: fsw ft0, 64(sp) -; RV32-NEXT: beqz a0, .LBB15_4 -; RV32-NEXT: .LBB15_2: -; RV32-NEXT: vsetivli zero, 1, e32, m4, ta, mu -; RV32-NEXT: vslidedown.vi v28, v8, 15 -; RV32-NEXT: j .LBB15_5 -; RV32-NEXT: .LBB15_3: -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fsw ft0, 64(sp) -; RV32-NEXT: bnez a0, .LBB15_2 -; RV32-NEXT: .LBB15_4: -; RV32-NEXT: vsetivli zero, 1, e32, m4, ta, mu -; RV32-NEXT: vslidedown.vi v28, v12, 15 -; RV32-NEXT: .LBB15_5: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 124(sp) -; RV32-NEXT: bnez a0, .LBB15_7 -; RV32-NEXT: # %bb.6: -; RV32-NEXT: vslidedown.vi v28, v12, 14 -; RV32-NEXT: j .LBB15_8 -; RV32-NEXT: .LBB15_7: -; RV32-NEXT: vslidedown.vi v28, v8, 14 -; RV32-NEXT: .LBB15_8: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 120(sp) -; RV32-NEXT: bnez a0, .LBB15_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: vslidedown.vi v28, v12, 13 -; RV32-NEXT: j .LBB15_11 -; RV32-NEXT: .LBB15_10: -; RV32-NEXT: vslidedown.vi v28, v8, 13 -; RV32-NEXT: .LBB15_11: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 116(sp) -; RV32-NEXT: bnez a0, .LBB15_13 -; RV32-NEXT: # %bb.12: -; RV32-NEXT: vslidedown.vi v28, v12, 12 -; RV32-NEXT: j .LBB15_14 -; RV32-NEXT: .LBB15_13: -; RV32-NEXT: vslidedown.vi v28, v8, 12 -; RV32-NEXT: .LBB15_14: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 112(sp) -; RV32-NEXT: bnez a0, .LBB15_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: vslidedown.vi v28, v12, 11 -; RV32-NEXT: j .LBB15_17 -; RV32-NEXT: .LBB15_16: -; RV32-NEXT: vslidedown.vi v28, v8, 11 -; RV32-NEXT: .LBB15_17: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 108(sp) -; RV32-NEXT: bnez a0, .LBB15_19 -; RV32-NEXT: # %bb.18: -; RV32-NEXT: vslidedown.vi v28, v12, 10 -; RV32-NEXT: j .LBB15_20 -; RV32-NEXT: .LBB15_19: -; RV32-NEXT: vslidedown.vi v28, v8, 10 -; RV32-NEXT: .LBB15_20: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 104(sp) -; RV32-NEXT: bnez a0, .LBB15_22 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: vslidedown.vi v28, v12, 9 -; RV32-NEXT: j .LBB15_23 -; RV32-NEXT: .LBB15_22: -; RV32-NEXT: vslidedown.vi v28, v8, 9 -; RV32-NEXT: .LBB15_23: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 100(sp) -; RV32-NEXT: bnez a0, .LBB15_25 -; RV32-NEXT: # %bb.24: -; RV32-NEXT: vslidedown.vi v28, v12, 8 -; RV32-NEXT: j .LBB15_26 -; RV32-NEXT: .LBB15_25: -; RV32-NEXT: vslidedown.vi v28, v8, 8 -; RV32-NEXT: .LBB15_26: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 96(sp) -; RV32-NEXT: bnez a0, .LBB15_28 -; RV32-NEXT: # %bb.27: -; RV32-NEXT: vslidedown.vi v28, v12, 7 -; RV32-NEXT: j .LBB15_29 -; RV32-NEXT: .LBB15_28: -; RV32-NEXT: vslidedown.vi v28, v8, 7 -; RV32-NEXT: .LBB15_29: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 92(sp) -; RV32-NEXT: bnez a0, .LBB15_31 -; RV32-NEXT: # %bb.30: -; RV32-NEXT: vslidedown.vi v28, v12, 6 -; RV32-NEXT: j .LBB15_32 -; RV32-NEXT: .LBB15_31: -; RV32-NEXT: vslidedown.vi v28, v8, 6 -; RV32-NEXT: .LBB15_32: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 88(sp) -; RV32-NEXT: bnez a0, .LBB15_34 -; RV32-NEXT: # %bb.33: -; RV32-NEXT: vslidedown.vi v28, v12, 5 -; RV32-NEXT: j .LBB15_35 -; RV32-NEXT: .LBB15_34: -; RV32-NEXT: vslidedown.vi v28, v8, 5 -; RV32-NEXT: .LBB15_35: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 84(sp) -; RV32-NEXT: bnez a0, .LBB15_37 -; RV32-NEXT: # %bb.36: -; RV32-NEXT: vslidedown.vi v28, v12, 4 -; RV32-NEXT: j .LBB15_38 -; RV32-NEXT: .LBB15_37: -; RV32-NEXT: vslidedown.vi v28, v8, 4 -; RV32-NEXT: .LBB15_38: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 80(sp) -; RV32-NEXT: bnez a0, .LBB15_40 -; RV32-NEXT: # %bb.39: -; RV32-NEXT: vslidedown.vi v28, v12, 3 -; RV32-NEXT: j .LBB15_41 -; RV32-NEXT: .LBB15_40: -; RV32-NEXT: vslidedown.vi v28, v8, 3 -; RV32-NEXT: .LBB15_41: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 76(sp) -; RV32-NEXT: bnez a0, .LBB15_43 -; RV32-NEXT: # %bb.42: -; RV32-NEXT: vslidedown.vi v28, v12, 2 -; RV32-NEXT: j .LBB15_44 -; RV32-NEXT: .LBB15_43: -; RV32-NEXT: vslidedown.vi v28, v8, 2 -; RV32-NEXT: .LBB15_44: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 72(sp) -; RV32-NEXT: bnez a0, .LBB15_46 -; RV32-NEXT: # %bb.45: -; RV32-NEXT: vslidedown.vi v28, v12, 1 -; RV32-NEXT: j .LBB15_47 -; RV32-NEXT: .LBB15_46: -; RV32-NEXT: vslidedown.vi v28, v8, 1 -; RV32-NEXT: .LBB15_47: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsw ft0, 68(sp) -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: addi a0, sp, 64 -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: addi sp, s0, -192 -; RV32-NEXT: lw s0, 184(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 188(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 192 -; RV32-NEXT: ret -; -; RV64-LABEL: selectcc_v16f32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -192 -; RV64-NEXT: .cfi_def_cfa_offset 192 -; RV64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: addi s0, sp, 192 -; RV64-NEXT: .cfi_def_cfa s0, 0 -; RV64-NEXT: andi sp, sp, -64 -; RV64-NEXT: feq.s a0, fa0, fa1 -; RV64-NEXT: bnez a0, .LBB15_3 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v12 -; RV64-NEXT: fsw ft0, 64(sp) -; RV64-NEXT: beqz a0, .LBB15_4 -; RV64-NEXT: .LBB15_2: -; RV64-NEXT: vsetivli zero, 1, e32, m4, ta, mu -; RV64-NEXT: vslidedown.vi v28, v8, 15 -; RV64-NEXT: j .LBB15_5 -; RV64-NEXT: .LBB15_3: -; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: fsw ft0, 64(sp) -; RV64-NEXT: bnez a0, .LBB15_2 -; RV64-NEXT: .LBB15_4: -; RV64-NEXT: vsetivli zero, 1, e32, m4, ta, mu -; RV64-NEXT: vslidedown.vi v28, v12, 15 -; RV64-NEXT: .LBB15_5: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 124(sp) -; RV64-NEXT: bnez a0, .LBB15_7 -; RV64-NEXT: # %bb.6: -; RV64-NEXT: vslidedown.vi v28, v12, 14 -; RV64-NEXT: j .LBB15_8 -; RV64-NEXT: .LBB15_7: -; RV64-NEXT: vslidedown.vi v28, v8, 14 -; RV64-NEXT: .LBB15_8: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 120(sp) -; RV64-NEXT: bnez a0, .LBB15_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: vslidedown.vi v28, v12, 13 -; RV64-NEXT: j .LBB15_11 -; RV64-NEXT: .LBB15_10: -; RV64-NEXT: vslidedown.vi v28, v8, 13 -; RV64-NEXT: .LBB15_11: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 116(sp) -; RV64-NEXT: bnez a0, .LBB15_13 -; RV64-NEXT: # %bb.12: -; RV64-NEXT: vslidedown.vi v28, v12, 12 -; RV64-NEXT: j .LBB15_14 -; RV64-NEXT: .LBB15_13: -; RV64-NEXT: vslidedown.vi v28, v8, 12 -; RV64-NEXT: .LBB15_14: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 112(sp) -; RV64-NEXT: bnez a0, .LBB15_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: vslidedown.vi v28, v12, 11 -; RV64-NEXT: j .LBB15_17 -; RV64-NEXT: .LBB15_16: -; RV64-NEXT: vslidedown.vi v28, v8, 11 -; RV64-NEXT: .LBB15_17: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 108(sp) -; RV64-NEXT: bnez a0, .LBB15_19 -; RV64-NEXT: # %bb.18: -; RV64-NEXT: vslidedown.vi v28, v12, 10 -; RV64-NEXT: j .LBB15_20 -; RV64-NEXT: .LBB15_19: -; RV64-NEXT: vslidedown.vi v28, v8, 10 -; RV64-NEXT: .LBB15_20: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 104(sp) -; RV64-NEXT: bnez a0, .LBB15_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: vslidedown.vi v28, v12, 9 -; RV64-NEXT: j .LBB15_23 -; RV64-NEXT: .LBB15_22: -; RV64-NEXT: vslidedown.vi v28, v8, 9 -; RV64-NEXT: .LBB15_23: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 100(sp) -; RV64-NEXT: bnez a0, .LBB15_25 -; RV64-NEXT: # %bb.24: -; RV64-NEXT: vslidedown.vi v28, v12, 8 -; RV64-NEXT: j .LBB15_26 -; RV64-NEXT: .LBB15_25: -; RV64-NEXT: vslidedown.vi v28, v8, 8 -; RV64-NEXT: .LBB15_26: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 96(sp) -; RV64-NEXT: bnez a0, .LBB15_28 -; RV64-NEXT: # %bb.27: -; RV64-NEXT: vslidedown.vi v28, v12, 7 -; RV64-NEXT: j .LBB15_29 -; RV64-NEXT: .LBB15_28: -; RV64-NEXT: vslidedown.vi v28, v8, 7 -; RV64-NEXT: .LBB15_29: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 92(sp) -; RV64-NEXT: bnez a0, .LBB15_31 -; RV64-NEXT: # %bb.30: -; RV64-NEXT: vslidedown.vi v28, v12, 6 -; RV64-NEXT: j .LBB15_32 -; RV64-NEXT: .LBB15_31: -; RV64-NEXT: vslidedown.vi v28, v8, 6 -; RV64-NEXT: .LBB15_32: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 88(sp) -; RV64-NEXT: bnez a0, .LBB15_34 -; RV64-NEXT: # %bb.33: -; RV64-NEXT: vslidedown.vi v28, v12, 5 -; RV64-NEXT: j .LBB15_35 -; RV64-NEXT: .LBB15_34: -; RV64-NEXT: vslidedown.vi v28, v8, 5 -; RV64-NEXT: .LBB15_35: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 84(sp) -; RV64-NEXT: bnez a0, .LBB15_37 -; RV64-NEXT: # %bb.36: -; RV64-NEXT: vslidedown.vi v28, v12, 4 -; RV64-NEXT: j .LBB15_38 -; RV64-NEXT: .LBB15_37: -; RV64-NEXT: vslidedown.vi v28, v8, 4 -; RV64-NEXT: .LBB15_38: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 80(sp) -; RV64-NEXT: bnez a0, .LBB15_40 -; RV64-NEXT: # %bb.39: -; RV64-NEXT: vslidedown.vi v28, v12, 3 -; RV64-NEXT: j .LBB15_41 -; RV64-NEXT: .LBB15_40: -; RV64-NEXT: vslidedown.vi v28, v8, 3 -; RV64-NEXT: .LBB15_41: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 76(sp) -; RV64-NEXT: bnez a0, .LBB15_43 -; RV64-NEXT: # %bb.42: -; RV64-NEXT: vslidedown.vi v28, v12, 2 -; RV64-NEXT: j .LBB15_44 -; RV64-NEXT: .LBB15_43: -; RV64-NEXT: vslidedown.vi v28, v8, 2 -; RV64-NEXT: .LBB15_44: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 72(sp) -; RV64-NEXT: bnez a0, .LBB15_46 -; RV64-NEXT: # %bb.45: -; RV64-NEXT: vslidedown.vi v28, v12, 1 -; RV64-NEXT: j .LBB15_47 -; RV64-NEXT: .LBB15_46: -; RV64-NEXT: vslidedown.vi v28, v8, 1 -; RV64-NEXT: .LBB15_47: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsw ft0, 68(sp) -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV64-NEXT: addi a0, sp, 64 -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: addi sp, s0, -192 -; RV64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 192 -; RV64-NEXT: ret +; CHECK-LABEL: selectcc_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.s a0, fa0, fa1 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: ret %cmp = fcmp oeq float %a, %b %v = select i1 %cmp, <16 x float> %c, <16 x float> %d ret <16 x float> %v @@ -2268,24 +231,11 @@ define <2 x double> @select_v2f64(i1 zeroext %c, <2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: select_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: bnez a0, .LBB16_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.f.s ft0, v9 -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: j .LBB16_3 -; CHECK-NEXT: .LBB16_2: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: .LBB16_3: -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v8, ft1 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, <2 x double> %a, <2 x double> %b ret <2 x double> %v @@ -2295,28 +245,11 @@ ; CHECK-LABEL: selectcc_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: feq.d a0, fa0, fa1 -; CHECK-NEXT: bnez a0, .LBB17_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: j .LBB17_3 -; CHECK-NEXT: .LBB17_2: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: .LBB17_3: -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v25, ft0 -; CHECK-NEXT: bnez a0, .LBB17_5 -; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: vfmv.f.s ft0, v9 -; CHECK-NEXT: j .LBB17_6 -; CHECK-NEXT: .LBB17_5: -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: .LBB17_6: -; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; CHECK-NEXT: vfmv.s.f v25, ft0 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = fcmp oeq double %a, %b %v = select i1 %cmp, <2 x double> %c, <2 x double> %d @@ -2324,1317 +257,84 @@ } define <4 x double> @select_v4f64(i1 zeroext %c, <4 x double> %a, <4 x double> %b) { -; RV32-LABEL: select_v4f64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -96 -; RV32-NEXT: .cfi_def_cfa_offset 96 -; RV32-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: addi s0, sp, 96 -; RV32-NEXT: .cfi_def_cfa s0, 0 -; RV32-NEXT: andi sp, sp, -32 -; RV32-NEXT: bnez a0, .LBB18_3 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v10 -; RV32-NEXT: fsd ft0, 32(sp) -; RV32-NEXT: beqz a0, .LBB18_4 -; RV32-NEXT: .LBB18_2: -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, mu -; RV32-NEXT: vslidedown.vi v26, v8, 3 -; RV32-NEXT: j .LBB18_5 -; RV32-NEXT: .LBB18_3: -; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fsd ft0, 32(sp) -; RV32-NEXT: bnez a0, .LBB18_2 -; RV32-NEXT: .LBB18_4: -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, mu -; RV32-NEXT: vslidedown.vi v26, v10, 3 -; RV32-NEXT: .LBB18_5: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsd ft0, 56(sp) -; RV32-NEXT: bnez a0, .LBB18_7 -; RV32-NEXT: # %bb.6: -; RV32-NEXT: vslidedown.vi v26, v10, 2 -; RV32-NEXT: j .LBB18_8 -; RV32-NEXT: .LBB18_7: -; RV32-NEXT: vslidedown.vi v26, v8, 2 -; RV32-NEXT: .LBB18_8: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsd ft0, 48(sp) -; RV32-NEXT: bnez a0, .LBB18_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: vslidedown.vi v26, v10, 1 -; RV32-NEXT: j .LBB18_11 -; RV32-NEXT: .LBB18_10: -; RV32-NEXT: vslidedown.vi v26, v8, 1 -; RV32-NEXT: .LBB18_11: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsd ft0, 40(sp) -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV32-NEXT: addi a0, sp, 32 -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi sp, s0, -96 -; RV32-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 96 -; RV32-NEXT: ret -; -; RV64-LABEL: select_v4f64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -96 -; RV64-NEXT: .cfi_def_cfa_offset 96 -; RV64-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: addi s0, sp, 96 -; RV64-NEXT: .cfi_def_cfa s0, 0 -; RV64-NEXT: andi sp, sp, -32 -; RV64-NEXT: bnez a0, .LBB18_3 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: fsd ft0, 32(sp) -; RV64-NEXT: beqz a0, .LBB18_4 -; RV64-NEXT: .LBB18_2: -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, mu -; RV64-NEXT: vslidedown.vi v26, v8, 3 -; RV64-NEXT: j .LBB18_5 -; RV64-NEXT: .LBB18_3: -; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: fsd ft0, 32(sp) -; RV64-NEXT: bnez a0, .LBB18_2 -; RV64-NEXT: .LBB18_4: -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, mu -; RV64-NEXT: vslidedown.vi v26, v10, 3 -; RV64-NEXT: .LBB18_5: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsd ft0, 56(sp) -; RV64-NEXT: bnez a0, .LBB18_7 -; RV64-NEXT: # %bb.6: -; RV64-NEXT: vslidedown.vi v26, v10, 2 -; RV64-NEXT: j .LBB18_8 -; RV64-NEXT: .LBB18_7: -; RV64-NEXT: vslidedown.vi v26, v8, 2 -; RV64-NEXT: .LBB18_8: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsd ft0, 48(sp) -; RV64-NEXT: bnez a0, .LBB18_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: vslidedown.vi v26, v10, 1 -; RV64-NEXT: j .LBB18_11 -; RV64-NEXT: .LBB18_10: -; RV64-NEXT: vslidedown.vi v26, v8, 1 -; RV64-NEXT: .LBB18_11: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsd ft0, 40(sp) -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi sp, s0, -96 -; RV64-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 96 -; RV64-NEXT: ret +; CHECK-LABEL: select_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret %v = select i1 %c, <4 x double> %a, <4 x double> %b ret <4 x double> %v } define <4 x double> @selectcc_v4f64(double %a, double %b, <4 x double> %c, <4 x double> %d) { -; RV32-LABEL: selectcc_v4f64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -96 -; RV32-NEXT: .cfi_def_cfa_offset 96 -; RV32-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: addi s0, sp, 96 -; RV32-NEXT: .cfi_def_cfa s0, 0 -; RV32-NEXT: andi sp, sp, -32 -; RV32-NEXT: feq.d a0, fa0, fa1 -; RV32-NEXT: bnez a0, .LBB19_3 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v10 -; RV32-NEXT: fsd ft0, 32(sp) -; RV32-NEXT: beqz a0, .LBB19_4 -; RV32-NEXT: .LBB19_2: -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, mu -; RV32-NEXT: vslidedown.vi v26, v8, 3 -; RV32-NEXT: j .LBB19_5 -; RV32-NEXT: .LBB19_3: -; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fsd ft0, 32(sp) -; RV32-NEXT: bnez a0, .LBB19_2 -; RV32-NEXT: .LBB19_4: -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, mu -; RV32-NEXT: vslidedown.vi v26, v10, 3 -; RV32-NEXT: .LBB19_5: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsd ft0, 56(sp) -; RV32-NEXT: bnez a0, .LBB19_7 -; RV32-NEXT: # %bb.6: -; RV32-NEXT: vslidedown.vi v26, v10, 2 -; RV32-NEXT: j .LBB19_8 -; RV32-NEXT: .LBB19_7: -; RV32-NEXT: vslidedown.vi v26, v8, 2 -; RV32-NEXT: .LBB19_8: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsd ft0, 48(sp) -; RV32-NEXT: bnez a0, .LBB19_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: vslidedown.vi v26, v10, 1 -; RV32-NEXT: j .LBB19_11 -; RV32-NEXT: .LBB19_10: -; RV32-NEXT: vslidedown.vi v26, v8, 1 -; RV32-NEXT: .LBB19_11: -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: fsd ft0, 40(sp) -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV32-NEXT: addi a0, sp, 32 -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi sp, s0, -96 -; RV32-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 96 -; RV32-NEXT: ret -; -; RV64-LABEL: selectcc_v4f64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -96 -; RV64-NEXT: .cfi_def_cfa_offset 96 -; RV64-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: addi s0, sp, 96 -; RV64-NEXT: .cfi_def_cfa s0, 0 -; RV64-NEXT: andi sp, sp, -32 -; RV64-NEXT: feq.d a0, fa0, fa1 -; RV64-NEXT: bnez a0, .LBB19_3 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v10 -; RV64-NEXT: fsd ft0, 32(sp) -; RV64-NEXT: beqz a0, .LBB19_4 -; RV64-NEXT: .LBB19_2: -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, mu -; RV64-NEXT: vslidedown.vi v26, v8, 3 -; RV64-NEXT: j .LBB19_5 -; RV64-NEXT: .LBB19_3: -; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: fsd ft0, 32(sp) -; RV64-NEXT: bnez a0, .LBB19_2 -; RV64-NEXT: .LBB19_4: -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, mu -; RV64-NEXT: vslidedown.vi v26, v10, 3 -; RV64-NEXT: .LBB19_5: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsd ft0, 56(sp) -; RV64-NEXT: bnez a0, .LBB19_7 -; RV64-NEXT: # %bb.6: -; RV64-NEXT: vslidedown.vi v26, v10, 2 -; RV64-NEXT: j .LBB19_8 -; RV64-NEXT: .LBB19_7: -; RV64-NEXT: vslidedown.vi v26, v8, 2 -; RV64-NEXT: .LBB19_8: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsd ft0, 48(sp) -; RV64-NEXT: bnez a0, .LBB19_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: vslidedown.vi v26, v10, 1 -; RV64-NEXT: j .LBB19_11 -; RV64-NEXT: .LBB19_10: -; RV64-NEXT: vslidedown.vi v26, v8, 1 -; RV64-NEXT: .LBB19_11: -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: fsd ft0, 40(sp) -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi sp, s0, -96 -; RV64-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 96 -; RV64-NEXT: ret +; CHECK-LABEL: selectcc_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.d a0, fa0, fa1 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret %cmp = fcmp oeq double %a, %b %v = select i1 %cmp, <4 x double> %c, <4 x double> %d ret <4 x double> %v } define <8 x double> @select_v8f64(i1 zeroext %c, <8 x double> %a, <8 x double> %b) { -; RV32-LABEL: select_v8f64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -192 -; RV32-NEXT: .cfi_def_cfa_offset 192 -; RV32-NEXT: sw ra, 188(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 184(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: addi s0, sp, 192 -; RV32-NEXT: .cfi_def_cfa s0, 0 -; RV32-NEXT: andi sp, sp, -64 -; RV32-NEXT: bnez a0, .LBB20_3 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v12 -; RV32-NEXT: fsd ft0, 64(sp) -; RV32-NEXT: beqz a0, .LBB20_4 -; RV32-NEXT: .LBB20_2: -; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, mu -; RV32-NEXT: vslidedown.vi v28, v8, 7 -; RV32-NEXT: j .LBB20_5 -; RV32-NEXT: .LBB20_3: -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fsd ft0, 64(sp) -; RV32-NEXT: bnez a0, .LBB20_2 -; RV32-NEXT: .LBB20_4: -; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, mu -; RV32-NEXT: vslidedown.vi v28, v12, 7 -; RV32-NEXT: .LBB20_5: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsd ft0, 120(sp) -; RV32-NEXT: bnez a0, .LBB20_7 -; RV32-NEXT: # %bb.6: -; RV32-NEXT: vslidedown.vi v28, v12, 6 -; RV32-NEXT: j .LBB20_8 -; RV32-NEXT: .LBB20_7: -; RV32-NEXT: vslidedown.vi v28, v8, 6 -; RV32-NEXT: .LBB20_8: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsd ft0, 112(sp) -; RV32-NEXT: bnez a0, .LBB20_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: vslidedown.vi v28, v12, 5 -; RV32-NEXT: j .LBB20_11 -; RV32-NEXT: .LBB20_10: -; RV32-NEXT: vslidedown.vi v28, v8, 5 -; RV32-NEXT: .LBB20_11: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsd ft0, 104(sp) -; RV32-NEXT: bnez a0, .LBB20_13 -; RV32-NEXT: # %bb.12: -; RV32-NEXT: vslidedown.vi v28, v12, 4 -; RV32-NEXT: j .LBB20_14 -; RV32-NEXT: .LBB20_13: -; RV32-NEXT: vslidedown.vi v28, v8, 4 -; RV32-NEXT: .LBB20_14: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsd ft0, 96(sp) -; RV32-NEXT: bnez a0, .LBB20_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: vslidedown.vi v28, v12, 3 -; RV32-NEXT: j .LBB20_17 -; RV32-NEXT: .LBB20_16: -; RV32-NEXT: vslidedown.vi v28, v8, 3 -; RV32-NEXT: .LBB20_17: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsd ft0, 88(sp) -; RV32-NEXT: bnez a0, .LBB20_19 -; RV32-NEXT: # %bb.18: -; RV32-NEXT: vslidedown.vi v28, v12, 2 -; RV32-NEXT: j .LBB20_20 -; RV32-NEXT: .LBB20_19: -; RV32-NEXT: vslidedown.vi v28, v8, 2 -; RV32-NEXT: .LBB20_20: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsd ft0, 80(sp) -; RV32-NEXT: bnez a0, .LBB20_22 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: vslidedown.vi v28, v12, 1 -; RV32-NEXT: j .LBB20_23 -; RV32-NEXT: .LBB20_22: -; RV32-NEXT: vslidedown.vi v28, v8, 1 -; RV32-NEXT: .LBB20_23: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsd ft0, 72(sp) -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: addi a0, sp, 64 -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi sp, s0, -192 -; RV32-NEXT: lw s0, 184(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 188(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 192 -; RV32-NEXT: ret -; -; RV64-LABEL: select_v8f64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -192 -; RV64-NEXT: .cfi_def_cfa_offset 192 -; RV64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: addi s0, sp, 192 -; RV64-NEXT: .cfi_def_cfa s0, 0 -; RV64-NEXT: andi sp, sp, -64 -; RV64-NEXT: bnez a0, .LBB20_3 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v12 -; RV64-NEXT: fsd ft0, 64(sp) -; RV64-NEXT: beqz a0, .LBB20_4 -; RV64-NEXT: .LBB20_2: -; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, mu -; RV64-NEXT: vslidedown.vi v28, v8, 7 -; RV64-NEXT: j .LBB20_5 -; RV64-NEXT: .LBB20_3: -; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: fsd ft0, 64(sp) -; RV64-NEXT: bnez a0, .LBB20_2 -; RV64-NEXT: .LBB20_4: -; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, mu -; RV64-NEXT: vslidedown.vi v28, v12, 7 -; RV64-NEXT: .LBB20_5: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsd ft0, 120(sp) -; RV64-NEXT: bnez a0, .LBB20_7 -; RV64-NEXT: # %bb.6: -; RV64-NEXT: vslidedown.vi v28, v12, 6 -; RV64-NEXT: j .LBB20_8 -; RV64-NEXT: .LBB20_7: -; RV64-NEXT: vslidedown.vi v28, v8, 6 -; RV64-NEXT: .LBB20_8: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsd ft0, 112(sp) -; RV64-NEXT: bnez a0, .LBB20_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: vslidedown.vi v28, v12, 5 -; RV64-NEXT: j .LBB20_11 -; RV64-NEXT: .LBB20_10: -; RV64-NEXT: vslidedown.vi v28, v8, 5 -; RV64-NEXT: .LBB20_11: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsd ft0, 104(sp) -; RV64-NEXT: bnez a0, .LBB20_13 -; RV64-NEXT: # %bb.12: -; RV64-NEXT: vslidedown.vi v28, v12, 4 -; RV64-NEXT: j .LBB20_14 -; RV64-NEXT: .LBB20_13: -; RV64-NEXT: vslidedown.vi v28, v8, 4 -; RV64-NEXT: .LBB20_14: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsd ft0, 96(sp) -; RV64-NEXT: bnez a0, .LBB20_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: vslidedown.vi v28, v12, 3 -; RV64-NEXT: j .LBB20_17 -; RV64-NEXT: .LBB20_16: -; RV64-NEXT: vslidedown.vi v28, v8, 3 -; RV64-NEXT: .LBB20_17: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsd ft0, 88(sp) -; RV64-NEXT: bnez a0, .LBB20_19 -; RV64-NEXT: # %bb.18: -; RV64-NEXT: vslidedown.vi v28, v12, 2 -; RV64-NEXT: j .LBB20_20 -; RV64-NEXT: .LBB20_19: -; RV64-NEXT: vslidedown.vi v28, v8, 2 -; RV64-NEXT: .LBB20_20: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsd ft0, 80(sp) -; RV64-NEXT: bnez a0, .LBB20_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: vslidedown.vi v28, v12, 1 -; RV64-NEXT: j .LBB20_23 -; RV64-NEXT: .LBB20_22: -; RV64-NEXT: vslidedown.vi v28, v8, 1 -; RV64-NEXT: .LBB20_23: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsd ft0, 72(sp) -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: addi a0, sp, 64 -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi sp, s0, -192 -; RV64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 192 -; RV64-NEXT: ret +; CHECK-LABEL: select_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: ret %v = select i1 %c, <8 x double> %a, <8 x double> %b ret <8 x double> %v } define <8 x double> @selectcc_v8f64(double %a, double %b, <8 x double> %c, <8 x double> %d) { -; RV32-LABEL: selectcc_v8f64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -192 -; RV32-NEXT: .cfi_def_cfa_offset 192 -; RV32-NEXT: sw ra, 188(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 184(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: addi s0, sp, 192 -; RV32-NEXT: .cfi_def_cfa s0, 0 -; RV32-NEXT: andi sp, sp, -64 -; RV32-NEXT: feq.d a0, fa0, fa1 -; RV32-NEXT: bnez a0, .LBB21_3 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v12 -; RV32-NEXT: fsd ft0, 64(sp) -; RV32-NEXT: beqz a0, .LBB21_4 -; RV32-NEXT: .LBB21_2: -; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, mu -; RV32-NEXT: vslidedown.vi v28, v8, 7 -; RV32-NEXT: j .LBB21_5 -; RV32-NEXT: .LBB21_3: -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fsd ft0, 64(sp) -; RV32-NEXT: bnez a0, .LBB21_2 -; RV32-NEXT: .LBB21_4: -; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, mu -; RV32-NEXT: vslidedown.vi v28, v12, 7 -; RV32-NEXT: .LBB21_5: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsd ft0, 120(sp) -; RV32-NEXT: bnez a0, .LBB21_7 -; RV32-NEXT: # %bb.6: -; RV32-NEXT: vslidedown.vi v28, v12, 6 -; RV32-NEXT: j .LBB21_8 -; RV32-NEXT: .LBB21_7: -; RV32-NEXT: vslidedown.vi v28, v8, 6 -; RV32-NEXT: .LBB21_8: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsd ft0, 112(sp) -; RV32-NEXT: bnez a0, .LBB21_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: vslidedown.vi v28, v12, 5 -; RV32-NEXT: j .LBB21_11 -; RV32-NEXT: .LBB21_10: -; RV32-NEXT: vslidedown.vi v28, v8, 5 -; RV32-NEXT: .LBB21_11: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsd ft0, 104(sp) -; RV32-NEXT: bnez a0, .LBB21_13 -; RV32-NEXT: # %bb.12: -; RV32-NEXT: vslidedown.vi v28, v12, 4 -; RV32-NEXT: j .LBB21_14 -; RV32-NEXT: .LBB21_13: -; RV32-NEXT: vslidedown.vi v28, v8, 4 -; RV32-NEXT: .LBB21_14: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsd ft0, 96(sp) -; RV32-NEXT: bnez a0, .LBB21_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: vslidedown.vi v28, v12, 3 -; RV32-NEXT: j .LBB21_17 -; RV32-NEXT: .LBB21_16: -; RV32-NEXT: vslidedown.vi v28, v8, 3 -; RV32-NEXT: .LBB21_17: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsd ft0, 88(sp) -; RV32-NEXT: bnez a0, .LBB21_19 -; RV32-NEXT: # %bb.18: -; RV32-NEXT: vslidedown.vi v28, v12, 2 -; RV32-NEXT: j .LBB21_20 -; RV32-NEXT: .LBB21_19: -; RV32-NEXT: vslidedown.vi v28, v8, 2 -; RV32-NEXT: .LBB21_20: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsd ft0, 80(sp) -; RV32-NEXT: bnez a0, .LBB21_22 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: vslidedown.vi v28, v12, 1 -; RV32-NEXT: j .LBB21_23 -; RV32-NEXT: .LBB21_22: -; RV32-NEXT: vslidedown.vi v28, v8, 1 -; RV32-NEXT: .LBB21_23: -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: fsd ft0, 72(sp) -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: addi a0, sp, 64 -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi sp, s0, -192 -; RV32-NEXT: lw s0, 184(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 188(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 192 -; RV32-NEXT: ret -; -; RV64-LABEL: selectcc_v8f64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -192 -; RV64-NEXT: .cfi_def_cfa_offset 192 -; RV64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: addi s0, sp, 192 -; RV64-NEXT: .cfi_def_cfa s0, 0 -; RV64-NEXT: andi sp, sp, -64 -; RV64-NEXT: feq.d a0, fa0, fa1 -; RV64-NEXT: bnez a0, .LBB21_3 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v12 -; RV64-NEXT: fsd ft0, 64(sp) -; RV64-NEXT: beqz a0, .LBB21_4 -; RV64-NEXT: .LBB21_2: -; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, mu -; RV64-NEXT: vslidedown.vi v28, v8, 7 -; RV64-NEXT: j .LBB21_5 -; RV64-NEXT: .LBB21_3: -; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: fsd ft0, 64(sp) -; RV64-NEXT: bnez a0, .LBB21_2 -; RV64-NEXT: .LBB21_4: -; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, mu -; RV64-NEXT: vslidedown.vi v28, v12, 7 -; RV64-NEXT: .LBB21_5: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsd ft0, 120(sp) -; RV64-NEXT: bnez a0, .LBB21_7 -; RV64-NEXT: # %bb.6: -; RV64-NEXT: vslidedown.vi v28, v12, 6 -; RV64-NEXT: j .LBB21_8 -; RV64-NEXT: .LBB21_7: -; RV64-NEXT: vslidedown.vi v28, v8, 6 -; RV64-NEXT: .LBB21_8: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsd ft0, 112(sp) -; RV64-NEXT: bnez a0, .LBB21_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: vslidedown.vi v28, v12, 5 -; RV64-NEXT: j .LBB21_11 -; RV64-NEXT: .LBB21_10: -; RV64-NEXT: vslidedown.vi v28, v8, 5 -; RV64-NEXT: .LBB21_11: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsd ft0, 104(sp) -; RV64-NEXT: bnez a0, .LBB21_13 -; RV64-NEXT: # %bb.12: -; RV64-NEXT: vslidedown.vi v28, v12, 4 -; RV64-NEXT: j .LBB21_14 -; RV64-NEXT: .LBB21_13: -; RV64-NEXT: vslidedown.vi v28, v8, 4 -; RV64-NEXT: .LBB21_14: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsd ft0, 96(sp) -; RV64-NEXT: bnez a0, .LBB21_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: vslidedown.vi v28, v12, 3 -; RV64-NEXT: j .LBB21_17 -; RV64-NEXT: .LBB21_16: -; RV64-NEXT: vslidedown.vi v28, v8, 3 -; RV64-NEXT: .LBB21_17: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsd ft0, 88(sp) -; RV64-NEXT: bnez a0, .LBB21_19 -; RV64-NEXT: # %bb.18: -; RV64-NEXT: vslidedown.vi v28, v12, 2 -; RV64-NEXT: j .LBB21_20 -; RV64-NEXT: .LBB21_19: -; RV64-NEXT: vslidedown.vi v28, v8, 2 -; RV64-NEXT: .LBB21_20: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsd ft0, 80(sp) -; RV64-NEXT: bnez a0, .LBB21_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: vslidedown.vi v28, v12, 1 -; RV64-NEXT: j .LBB21_23 -; RV64-NEXT: .LBB21_22: -; RV64-NEXT: vslidedown.vi v28, v8, 1 -; RV64-NEXT: .LBB21_23: -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: fsd ft0, 72(sp) -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: addi a0, sp, 64 -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi sp, s0, -192 -; RV64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 192 -; RV64-NEXT: ret +; CHECK-LABEL: selectcc_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.d a0, fa0, fa1 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: ret %cmp = fcmp oeq double %a, %b %v = select i1 %cmp, <8 x double> %c, <8 x double> %d ret <8 x double> %v } define <16 x double> @select_v16f64(i1 zeroext %c, <16 x double> %a, <16 x double> %b) { -; RV32-LABEL: select_v16f64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -384 -; RV32-NEXT: .cfi_def_cfa_offset 384 -; RV32-NEXT: sw ra, 380(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: addi s0, sp, 384 -; RV32-NEXT: .cfi_def_cfa s0, 0 -; RV32-NEXT: andi sp, sp, -128 -; RV32-NEXT: bnez a0, .LBB22_3 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v16 -; RV32-NEXT: fsd ft0, 128(sp) -; RV32-NEXT: beqz a0, .LBB22_4 -; RV32-NEXT: .LBB22_2: -; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, mu -; RV32-NEXT: vslidedown.vi v24, v8, 15 -; RV32-NEXT: j .LBB22_5 -; RV32-NEXT: .LBB22_3: -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fsd ft0, 128(sp) -; RV32-NEXT: bnez a0, .LBB22_2 -; RV32-NEXT: .LBB22_4: -; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, mu -; RV32-NEXT: vslidedown.vi v24, v16, 15 -; RV32-NEXT: .LBB22_5: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 248(sp) -; RV32-NEXT: bnez a0, .LBB22_7 -; RV32-NEXT: # %bb.6: -; RV32-NEXT: vslidedown.vi v24, v16, 14 -; RV32-NEXT: j .LBB22_8 -; RV32-NEXT: .LBB22_7: -; RV32-NEXT: vslidedown.vi v24, v8, 14 -; RV32-NEXT: .LBB22_8: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 240(sp) -; RV32-NEXT: bnez a0, .LBB22_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: vslidedown.vi v24, v16, 13 -; RV32-NEXT: j .LBB22_11 -; RV32-NEXT: .LBB22_10: -; RV32-NEXT: vslidedown.vi v24, v8, 13 -; RV32-NEXT: .LBB22_11: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 232(sp) -; RV32-NEXT: bnez a0, .LBB22_13 -; RV32-NEXT: # %bb.12: -; RV32-NEXT: vslidedown.vi v24, v16, 12 -; RV32-NEXT: j .LBB22_14 -; RV32-NEXT: .LBB22_13: -; RV32-NEXT: vslidedown.vi v24, v8, 12 -; RV32-NEXT: .LBB22_14: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 224(sp) -; RV32-NEXT: bnez a0, .LBB22_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: vslidedown.vi v24, v16, 11 -; RV32-NEXT: j .LBB22_17 -; RV32-NEXT: .LBB22_16: -; RV32-NEXT: vslidedown.vi v24, v8, 11 -; RV32-NEXT: .LBB22_17: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 216(sp) -; RV32-NEXT: bnez a0, .LBB22_19 -; RV32-NEXT: # %bb.18: -; RV32-NEXT: vslidedown.vi v24, v16, 10 -; RV32-NEXT: j .LBB22_20 -; RV32-NEXT: .LBB22_19: -; RV32-NEXT: vslidedown.vi v24, v8, 10 -; RV32-NEXT: .LBB22_20: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 208(sp) -; RV32-NEXT: bnez a0, .LBB22_22 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: vslidedown.vi v24, v16, 9 -; RV32-NEXT: j .LBB22_23 -; RV32-NEXT: .LBB22_22: -; RV32-NEXT: vslidedown.vi v24, v8, 9 -; RV32-NEXT: .LBB22_23: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 200(sp) -; RV32-NEXT: bnez a0, .LBB22_25 -; RV32-NEXT: # %bb.24: -; RV32-NEXT: vslidedown.vi v24, v16, 8 -; RV32-NEXT: j .LBB22_26 -; RV32-NEXT: .LBB22_25: -; RV32-NEXT: vslidedown.vi v24, v8, 8 -; RV32-NEXT: .LBB22_26: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 192(sp) -; RV32-NEXT: bnez a0, .LBB22_28 -; RV32-NEXT: # %bb.27: -; RV32-NEXT: vslidedown.vi v24, v16, 7 -; RV32-NEXT: j .LBB22_29 -; RV32-NEXT: .LBB22_28: -; RV32-NEXT: vslidedown.vi v24, v8, 7 -; RV32-NEXT: .LBB22_29: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 184(sp) -; RV32-NEXT: bnez a0, .LBB22_31 -; RV32-NEXT: # %bb.30: -; RV32-NEXT: vslidedown.vi v24, v16, 6 -; RV32-NEXT: j .LBB22_32 -; RV32-NEXT: .LBB22_31: -; RV32-NEXT: vslidedown.vi v24, v8, 6 -; RV32-NEXT: .LBB22_32: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 176(sp) -; RV32-NEXT: bnez a0, .LBB22_34 -; RV32-NEXT: # %bb.33: -; RV32-NEXT: vslidedown.vi v24, v16, 5 -; RV32-NEXT: j .LBB22_35 -; RV32-NEXT: .LBB22_34: -; RV32-NEXT: vslidedown.vi v24, v8, 5 -; RV32-NEXT: .LBB22_35: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 168(sp) -; RV32-NEXT: bnez a0, .LBB22_37 -; RV32-NEXT: # %bb.36: -; RV32-NEXT: vslidedown.vi v24, v16, 4 -; RV32-NEXT: j .LBB22_38 -; RV32-NEXT: .LBB22_37: -; RV32-NEXT: vslidedown.vi v24, v8, 4 -; RV32-NEXT: .LBB22_38: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 160(sp) -; RV32-NEXT: bnez a0, .LBB22_40 -; RV32-NEXT: # %bb.39: -; RV32-NEXT: vslidedown.vi v24, v16, 3 -; RV32-NEXT: j .LBB22_41 -; RV32-NEXT: .LBB22_40: -; RV32-NEXT: vslidedown.vi v24, v8, 3 -; RV32-NEXT: .LBB22_41: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 152(sp) -; RV32-NEXT: bnez a0, .LBB22_43 -; RV32-NEXT: # %bb.42: -; RV32-NEXT: vslidedown.vi v24, v16, 2 -; RV32-NEXT: j .LBB22_44 -; RV32-NEXT: .LBB22_43: -; RV32-NEXT: vslidedown.vi v24, v8, 2 -; RV32-NEXT: .LBB22_44: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 144(sp) -; RV32-NEXT: bnez a0, .LBB22_46 -; RV32-NEXT: # %bb.45: -; RV32-NEXT: vslidedown.vi v8, v16, 1 -; RV32-NEXT: j .LBB22_47 -; RV32-NEXT: .LBB22_46: -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: .LBB22_47: -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fsd ft0, 136(sp) -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: addi a0, sp, 128 -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi sp, s0, -384 -; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 384 -; RV32-NEXT: ret -; -; RV64-LABEL: select_v16f64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -384 -; RV64-NEXT: .cfi_def_cfa_offset 384 -; RV64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: addi s0, sp, 384 -; RV64-NEXT: .cfi_def_cfa s0, 0 -; RV64-NEXT: andi sp, sp, -128 -; RV64-NEXT: bnez a0, .LBB22_3 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v16 -; RV64-NEXT: fsd ft0, 128(sp) -; RV64-NEXT: beqz a0, .LBB22_4 -; RV64-NEXT: .LBB22_2: -; RV64-NEXT: vsetivli zero, 1, e64, m8, ta, mu -; RV64-NEXT: vslidedown.vi v24, v8, 15 -; RV64-NEXT: j .LBB22_5 -; RV64-NEXT: .LBB22_3: -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: fsd ft0, 128(sp) -; RV64-NEXT: bnez a0, .LBB22_2 -; RV64-NEXT: .LBB22_4: -; RV64-NEXT: vsetivli zero, 1, e64, m8, ta, mu -; RV64-NEXT: vslidedown.vi v24, v16, 15 -; RV64-NEXT: .LBB22_5: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 248(sp) -; RV64-NEXT: bnez a0, .LBB22_7 -; RV64-NEXT: # %bb.6: -; RV64-NEXT: vslidedown.vi v24, v16, 14 -; RV64-NEXT: j .LBB22_8 -; RV64-NEXT: .LBB22_7: -; RV64-NEXT: vslidedown.vi v24, v8, 14 -; RV64-NEXT: .LBB22_8: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 240(sp) -; RV64-NEXT: bnez a0, .LBB22_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: vslidedown.vi v24, v16, 13 -; RV64-NEXT: j .LBB22_11 -; RV64-NEXT: .LBB22_10: -; RV64-NEXT: vslidedown.vi v24, v8, 13 -; RV64-NEXT: .LBB22_11: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 232(sp) -; RV64-NEXT: bnez a0, .LBB22_13 -; RV64-NEXT: # %bb.12: -; RV64-NEXT: vslidedown.vi v24, v16, 12 -; RV64-NEXT: j .LBB22_14 -; RV64-NEXT: .LBB22_13: -; RV64-NEXT: vslidedown.vi v24, v8, 12 -; RV64-NEXT: .LBB22_14: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 224(sp) -; RV64-NEXT: bnez a0, .LBB22_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: vslidedown.vi v24, v16, 11 -; RV64-NEXT: j .LBB22_17 -; RV64-NEXT: .LBB22_16: -; RV64-NEXT: vslidedown.vi v24, v8, 11 -; RV64-NEXT: .LBB22_17: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 216(sp) -; RV64-NEXT: bnez a0, .LBB22_19 -; RV64-NEXT: # %bb.18: -; RV64-NEXT: vslidedown.vi v24, v16, 10 -; RV64-NEXT: j .LBB22_20 -; RV64-NEXT: .LBB22_19: -; RV64-NEXT: vslidedown.vi v24, v8, 10 -; RV64-NEXT: .LBB22_20: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 208(sp) -; RV64-NEXT: bnez a0, .LBB22_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: vslidedown.vi v24, v16, 9 -; RV64-NEXT: j .LBB22_23 -; RV64-NEXT: .LBB22_22: -; RV64-NEXT: vslidedown.vi v24, v8, 9 -; RV64-NEXT: .LBB22_23: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 200(sp) -; RV64-NEXT: bnez a0, .LBB22_25 -; RV64-NEXT: # %bb.24: -; RV64-NEXT: vslidedown.vi v24, v16, 8 -; RV64-NEXT: j .LBB22_26 -; RV64-NEXT: .LBB22_25: -; RV64-NEXT: vslidedown.vi v24, v8, 8 -; RV64-NEXT: .LBB22_26: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 192(sp) -; RV64-NEXT: bnez a0, .LBB22_28 -; RV64-NEXT: # %bb.27: -; RV64-NEXT: vslidedown.vi v24, v16, 7 -; RV64-NEXT: j .LBB22_29 -; RV64-NEXT: .LBB22_28: -; RV64-NEXT: vslidedown.vi v24, v8, 7 -; RV64-NEXT: .LBB22_29: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 184(sp) -; RV64-NEXT: bnez a0, .LBB22_31 -; RV64-NEXT: # %bb.30: -; RV64-NEXT: vslidedown.vi v24, v16, 6 -; RV64-NEXT: j .LBB22_32 -; RV64-NEXT: .LBB22_31: -; RV64-NEXT: vslidedown.vi v24, v8, 6 -; RV64-NEXT: .LBB22_32: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 176(sp) -; RV64-NEXT: bnez a0, .LBB22_34 -; RV64-NEXT: # %bb.33: -; RV64-NEXT: vslidedown.vi v24, v16, 5 -; RV64-NEXT: j .LBB22_35 -; RV64-NEXT: .LBB22_34: -; RV64-NEXT: vslidedown.vi v24, v8, 5 -; RV64-NEXT: .LBB22_35: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 168(sp) -; RV64-NEXT: bnez a0, .LBB22_37 -; RV64-NEXT: # %bb.36: -; RV64-NEXT: vslidedown.vi v24, v16, 4 -; RV64-NEXT: j .LBB22_38 -; RV64-NEXT: .LBB22_37: -; RV64-NEXT: vslidedown.vi v24, v8, 4 -; RV64-NEXT: .LBB22_38: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 160(sp) -; RV64-NEXT: bnez a0, .LBB22_40 -; RV64-NEXT: # %bb.39: -; RV64-NEXT: vslidedown.vi v24, v16, 3 -; RV64-NEXT: j .LBB22_41 -; RV64-NEXT: .LBB22_40: -; RV64-NEXT: vslidedown.vi v24, v8, 3 -; RV64-NEXT: .LBB22_41: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 152(sp) -; RV64-NEXT: bnez a0, .LBB22_43 -; RV64-NEXT: # %bb.42: -; RV64-NEXT: vslidedown.vi v24, v16, 2 -; RV64-NEXT: j .LBB22_44 -; RV64-NEXT: .LBB22_43: -; RV64-NEXT: vslidedown.vi v24, v8, 2 -; RV64-NEXT: .LBB22_44: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 144(sp) -; RV64-NEXT: bnez a0, .LBB22_46 -; RV64-NEXT: # %bb.45: -; RV64-NEXT: vslidedown.vi v8, v16, 1 -; RV64-NEXT: j .LBB22_47 -; RV64-NEXT: .LBB22_46: -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: .LBB22_47: -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: fsd ft0, 136(sp) -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: addi a0, sp, 128 -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi sp, s0, -384 -; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 384 -; RV64-NEXT: ret +; CHECK-LABEL: select_v16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: ret %v = select i1 %c, <16 x double> %a, <16 x double> %b ret <16 x double> %v } define <16 x double> @selectcc_v16f64(double %a, double %b, <16 x double> %c, <16 x double> %d) { -; RV32-LABEL: selectcc_v16f64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -384 -; RV32-NEXT: .cfi_def_cfa_offset 384 -; RV32-NEXT: sw ra, 380(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: addi s0, sp, 384 -; RV32-NEXT: .cfi_def_cfa s0, 0 -; RV32-NEXT: andi sp, sp, -128 -; RV32-NEXT: feq.d a0, fa0, fa1 -; RV32-NEXT: bnez a0, .LBB23_3 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v16 -; RV32-NEXT: fsd ft0, 128(sp) -; RV32-NEXT: beqz a0, .LBB23_4 -; RV32-NEXT: .LBB23_2: -; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, mu -; RV32-NEXT: vslidedown.vi v24, v8, 15 -; RV32-NEXT: j .LBB23_5 -; RV32-NEXT: .LBB23_3: -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fsd ft0, 128(sp) -; RV32-NEXT: bnez a0, .LBB23_2 -; RV32-NEXT: .LBB23_4: -; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, mu -; RV32-NEXT: vslidedown.vi v24, v16, 15 -; RV32-NEXT: .LBB23_5: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 248(sp) -; RV32-NEXT: bnez a0, .LBB23_7 -; RV32-NEXT: # %bb.6: -; RV32-NEXT: vslidedown.vi v24, v16, 14 -; RV32-NEXT: j .LBB23_8 -; RV32-NEXT: .LBB23_7: -; RV32-NEXT: vslidedown.vi v24, v8, 14 -; RV32-NEXT: .LBB23_8: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 240(sp) -; RV32-NEXT: bnez a0, .LBB23_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: vslidedown.vi v24, v16, 13 -; RV32-NEXT: j .LBB23_11 -; RV32-NEXT: .LBB23_10: -; RV32-NEXT: vslidedown.vi v24, v8, 13 -; RV32-NEXT: .LBB23_11: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 232(sp) -; RV32-NEXT: bnez a0, .LBB23_13 -; RV32-NEXT: # %bb.12: -; RV32-NEXT: vslidedown.vi v24, v16, 12 -; RV32-NEXT: j .LBB23_14 -; RV32-NEXT: .LBB23_13: -; RV32-NEXT: vslidedown.vi v24, v8, 12 -; RV32-NEXT: .LBB23_14: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 224(sp) -; RV32-NEXT: bnez a0, .LBB23_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: vslidedown.vi v24, v16, 11 -; RV32-NEXT: j .LBB23_17 -; RV32-NEXT: .LBB23_16: -; RV32-NEXT: vslidedown.vi v24, v8, 11 -; RV32-NEXT: .LBB23_17: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 216(sp) -; RV32-NEXT: bnez a0, .LBB23_19 -; RV32-NEXT: # %bb.18: -; RV32-NEXT: vslidedown.vi v24, v16, 10 -; RV32-NEXT: j .LBB23_20 -; RV32-NEXT: .LBB23_19: -; RV32-NEXT: vslidedown.vi v24, v8, 10 -; RV32-NEXT: .LBB23_20: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 208(sp) -; RV32-NEXT: bnez a0, .LBB23_22 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: vslidedown.vi v24, v16, 9 -; RV32-NEXT: j .LBB23_23 -; RV32-NEXT: .LBB23_22: -; RV32-NEXT: vslidedown.vi v24, v8, 9 -; RV32-NEXT: .LBB23_23: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 200(sp) -; RV32-NEXT: bnez a0, .LBB23_25 -; RV32-NEXT: # %bb.24: -; RV32-NEXT: vslidedown.vi v24, v16, 8 -; RV32-NEXT: j .LBB23_26 -; RV32-NEXT: .LBB23_25: -; RV32-NEXT: vslidedown.vi v24, v8, 8 -; RV32-NEXT: .LBB23_26: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 192(sp) -; RV32-NEXT: bnez a0, .LBB23_28 -; RV32-NEXT: # %bb.27: -; RV32-NEXT: vslidedown.vi v24, v16, 7 -; RV32-NEXT: j .LBB23_29 -; RV32-NEXT: .LBB23_28: -; RV32-NEXT: vslidedown.vi v24, v8, 7 -; RV32-NEXT: .LBB23_29: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 184(sp) -; RV32-NEXT: bnez a0, .LBB23_31 -; RV32-NEXT: # %bb.30: -; RV32-NEXT: vslidedown.vi v24, v16, 6 -; RV32-NEXT: j .LBB23_32 -; RV32-NEXT: .LBB23_31: -; RV32-NEXT: vslidedown.vi v24, v8, 6 -; RV32-NEXT: .LBB23_32: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 176(sp) -; RV32-NEXT: bnez a0, .LBB23_34 -; RV32-NEXT: # %bb.33: -; RV32-NEXT: vslidedown.vi v24, v16, 5 -; RV32-NEXT: j .LBB23_35 -; RV32-NEXT: .LBB23_34: -; RV32-NEXT: vslidedown.vi v24, v8, 5 -; RV32-NEXT: .LBB23_35: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 168(sp) -; RV32-NEXT: bnez a0, .LBB23_37 -; RV32-NEXT: # %bb.36: -; RV32-NEXT: vslidedown.vi v24, v16, 4 -; RV32-NEXT: j .LBB23_38 -; RV32-NEXT: .LBB23_37: -; RV32-NEXT: vslidedown.vi v24, v8, 4 -; RV32-NEXT: .LBB23_38: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 160(sp) -; RV32-NEXT: bnez a0, .LBB23_40 -; RV32-NEXT: # %bb.39: -; RV32-NEXT: vslidedown.vi v24, v16, 3 -; RV32-NEXT: j .LBB23_41 -; RV32-NEXT: .LBB23_40: -; RV32-NEXT: vslidedown.vi v24, v8, 3 -; RV32-NEXT: .LBB23_41: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 152(sp) -; RV32-NEXT: bnez a0, .LBB23_43 -; RV32-NEXT: # %bb.42: -; RV32-NEXT: vslidedown.vi v24, v16, 2 -; RV32-NEXT: j .LBB23_44 -; RV32-NEXT: .LBB23_43: -; RV32-NEXT: vslidedown.vi v24, v8, 2 -; RV32-NEXT: .LBB23_44: -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: fsd ft0, 144(sp) -; RV32-NEXT: bnez a0, .LBB23_46 -; RV32-NEXT: # %bb.45: -; RV32-NEXT: vslidedown.vi v8, v16, 1 -; RV32-NEXT: j .LBB23_47 -; RV32-NEXT: .LBB23_46: -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: .LBB23_47: -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fsd ft0, 136(sp) -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: addi a0, sp, 128 -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi sp, s0, -384 -; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 384 -; RV32-NEXT: ret -; -; RV64-LABEL: selectcc_v16f64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -384 -; RV64-NEXT: .cfi_def_cfa_offset 384 -; RV64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: addi s0, sp, 384 -; RV64-NEXT: .cfi_def_cfa s0, 0 -; RV64-NEXT: andi sp, sp, -128 -; RV64-NEXT: feq.d a0, fa0, fa1 -; RV64-NEXT: bnez a0, .LBB23_3 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v16 -; RV64-NEXT: fsd ft0, 128(sp) -; RV64-NEXT: beqz a0, .LBB23_4 -; RV64-NEXT: .LBB23_2: -; RV64-NEXT: vsetivli zero, 1, e64, m8, ta, mu -; RV64-NEXT: vslidedown.vi v24, v8, 15 -; RV64-NEXT: j .LBB23_5 -; RV64-NEXT: .LBB23_3: -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: fsd ft0, 128(sp) -; RV64-NEXT: bnez a0, .LBB23_2 -; RV64-NEXT: .LBB23_4: -; RV64-NEXT: vsetivli zero, 1, e64, m8, ta, mu -; RV64-NEXT: vslidedown.vi v24, v16, 15 -; RV64-NEXT: .LBB23_5: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 248(sp) -; RV64-NEXT: bnez a0, .LBB23_7 -; RV64-NEXT: # %bb.6: -; RV64-NEXT: vslidedown.vi v24, v16, 14 -; RV64-NEXT: j .LBB23_8 -; RV64-NEXT: .LBB23_7: -; RV64-NEXT: vslidedown.vi v24, v8, 14 -; RV64-NEXT: .LBB23_8: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 240(sp) -; RV64-NEXT: bnez a0, .LBB23_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: vslidedown.vi v24, v16, 13 -; RV64-NEXT: j .LBB23_11 -; RV64-NEXT: .LBB23_10: -; RV64-NEXT: vslidedown.vi v24, v8, 13 -; RV64-NEXT: .LBB23_11: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 232(sp) -; RV64-NEXT: bnez a0, .LBB23_13 -; RV64-NEXT: # %bb.12: -; RV64-NEXT: vslidedown.vi v24, v16, 12 -; RV64-NEXT: j .LBB23_14 -; RV64-NEXT: .LBB23_13: -; RV64-NEXT: vslidedown.vi v24, v8, 12 -; RV64-NEXT: .LBB23_14: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 224(sp) -; RV64-NEXT: bnez a0, .LBB23_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: vslidedown.vi v24, v16, 11 -; RV64-NEXT: j .LBB23_17 -; RV64-NEXT: .LBB23_16: -; RV64-NEXT: vslidedown.vi v24, v8, 11 -; RV64-NEXT: .LBB23_17: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 216(sp) -; RV64-NEXT: bnez a0, .LBB23_19 -; RV64-NEXT: # %bb.18: -; RV64-NEXT: vslidedown.vi v24, v16, 10 -; RV64-NEXT: j .LBB23_20 -; RV64-NEXT: .LBB23_19: -; RV64-NEXT: vslidedown.vi v24, v8, 10 -; RV64-NEXT: .LBB23_20: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 208(sp) -; RV64-NEXT: bnez a0, .LBB23_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: vslidedown.vi v24, v16, 9 -; RV64-NEXT: j .LBB23_23 -; RV64-NEXT: .LBB23_22: -; RV64-NEXT: vslidedown.vi v24, v8, 9 -; RV64-NEXT: .LBB23_23: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 200(sp) -; RV64-NEXT: bnez a0, .LBB23_25 -; RV64-NEXT: # %bb.24: -; RV64-NEXT: vslidedown.vi v24, v16, 8 -; RV64-NEXT: j .LBB23_26 -; RV64-NEXT: .LBB23_25: -; RV64-NEXT: vslidedown.vi v24, v8, 8 -; RV64-NEXT: .LBB23_26: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 192(sp) -; RV64-NEXT: bnez a0, .LBB23_28 -; RV64-NEXT: # %bb.27: -; RV64-NEXT: vslidedown.vi v24, v16, 7 -; RV64-NEXT: j .LBB23_29 -; RV64-NEXT: .LBB23_28: -; RV64-NEXT: vslidedown.vi v24, v8, 7 -; RV64-NEXT: .LBB23_29: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 184(sp) -; RV64-NEXT: bnez a0, .LBB23_31 -; RV64-NEXT: # %bb.30: -; RV64-NEXT: vslidedown.vi v24, v16, 6 -; RV64-NEXT: j .LBB23_32 -; RV64-NEXT: .LBB23_31: -; RV64-NEXT: vslidedown.vi v24, v8, 6 -; RV64-NEXT: .LBB23_32: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 176(sp) -; RV64-NEXT: bnez a0, .LBB23_34 -; RV64-NEXT: # %bb.33: -; RV64-NEXT: vslidedown.vi v24, v16, 5 -; RV64-NEXT: j .LBB23_35 -; RV64-NEXT: .LBB23_34: -; RV64-NEXT: vslidedown.vi v24, v8, 5 -; RV64-NEXT: .LBB23_35: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 168(sp) -; RV64-NEXT: bnez a0, .LBB23_37 -; RV64-NEXT: # %bb.36: -; RV64-NEXT: vslidedown.vi v24, v16, 4 -; RV64-NEXT: j .LBB23_38 -; RV64-NEXT: .LBB23_37: -; RV64-NEXT: vslidedown.vi v24, v8, 4 -; RV64-NEXT: .LBB23_38: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 160(sp) -; RV64-NEXT: bnez a0, .LBB23_40 -; RV64-NEXT: # %bb.39: -; RV64-NEXT: vslidedown.vi v24, v16, 3 -; RV64-NEXT: j .LBB23_41 -; RV64-NEXT: .LBB23_40: -; RV64-NEXT: vslidedown.vi v24, v8, 3 -; RV64-NEXT: .LBB23_41: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 152(sp) -; RV64-NEXT: bnez a0, .LBB23_43 -; RV64-NEXT: # %bb.42: -; RV64-NEXT: vslidedown.vi v24, v16, 2 -; RV64-NEXT: j .LBB23_44 -; RV64-NEXT: .LBB23_43: -; RV64-NEXT: vslidedown.vi v24, v8, 2 -; RV64-NEXT: .LBB23_44: -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: fsd ft0, 144(sp) -; RV64-NEXT: bnez a0, .LBB23_46 -; RV64-NEXT: # %bb.45: -; RV64-NEXT: vslidedown.vi v8, v16, 1 -; RV64-NEXT: j .LBB23_47 -; RV64-NEXT: .LBB23_46: -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: .LBB23_47: -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: fsd ft0, 136(sp) -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: addi a0, sp, 128 -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi sp, s0, -384 -; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 384 -; RV64-NEXT: ret +; CHECK-LABEL: selectcc_v16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.d a0, fa0, fa1 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: ret %cmp = fcmp oeq double %a, %b %v = select i1 %cmp, <16 x double> %c, <16 x double> %d ret <16 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll @@ -7,13 +7,8 @@ define <1 x i1> @select_v1i1(i1 zeroext %c, <1 x i1> %a, <1 x i1> %b) { ; CHECK-LABEL: select_v1i1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, 1 -; CHECK-NEXT: bnez a0, .LBB0_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vmsne.vi v25, v25, 0 ; CHECK-NEXT: vmandnot.mm v26, v8, v25 ; CHECK-NEXT: vmand.mm v25, v0, v25 @@ -27,12 +22,7 @@ ; CHECK-LABEL: selectcc_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: xor a0, a0, a1 -; CHECK-NEXT: andi a1, a0, 1 -; CHECK-NEXT: addi a0, zero, 1 -; CHECK-NEXT: bnez a1, .LBB1_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vmsne.vi v25, v25, 0 @@ -48,13 +38,8 @@ define <2 x i1> @select_v2i1(i1 zeroext %c, <2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: select_v2i1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, 1 -; CHECK-NEXT: bnez a0, .LBB2_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu -; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vmsne.vi v25, v25, 0 ; CHECK-NEXT: vmandnot.mm v26, v8, v25 ; CHECK-NEXT: vmand.mm v25, v0, v25 @@ -68,12 +53,7 @@ ; CHECK-LABEL: selectcc_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: xor a0, a0, a1 -; CHECK-NEXT: andi a1, a0, 1 -; CHECK-NEXT: addi a0, zero, 1 -; CHECK-NEXT: bnez a1, .LBB3_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vmsne.vi v25, v25, 0 @@ -89,13 +69,8 @@ define <4 x i1> @select_v4i1(i1 zeroext %c, <4 x i1> %a, <4 x i1> %b) { ; CHECK-LABEL: select_v4i1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, 1 -; CHECK-NEXT: bnez a0, .LBB4_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB4_2: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu -; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vmsne.vi v25, v25, 0 ; CHECK-NEXT: vmandnot.mm v26, v8, v25 ; CHECK-NEXT: vmand.mm v25, v0, v25 @@ -109,12 +84,7 @@ ; CHECK-LABEL: selectcc_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: xor a0, a0, a1 -; CHECK-NEXT: andi a1, a0, 1 -; CHECK-NEXT: addi a0, zero, 1 -; CHECK-NEXT: bnez a1, .LBB5_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vmsne.vi v25, v25, 0 @@ -130,13 +100,8 @@ define <8 x i1> @select_v8i1(i1 zeroext %c, <8 x i1> %a, <8 x i1> %b) { ; CHECK-LABEL: select_v8i1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, 1 -; CHECK-NEXT: bnez a0, .LBB6_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB6_2: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vmsne.vi v25, v25, 0 ; CHECK-NEXT: vmandnot.mm v26, v8, v25 ; CHECK-NEXT: vmand.mm v25, v0, v25 @@ -150,12 +115,7 @@ ; CHECK-LABEL: selectcc_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: xor a0, a0, a1 -; CHECK-NEXT: andi a1, a0, 1 -; CHECK-NEXT: addi a0, zero, 1 -; CHECK-NEXT: bnez a1, .LBB7_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vmsne.vi v25, v25, 0 @@ -171,13 +131,8 @@ define <16 x i1> @select_v16i1(i1 zeroext %c, <16 x i1> %a, <16 x i1> %b) { ; CHECK-LABEL: select_v16i1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, 1 -; CHECK-NEXT: bnez a0, .LBB8_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB8_2: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vmsne.vi v25, v25, 0 ; CHECK-NEXT: vmandnot.mm v26, v8, v25 ; CHECK-NEXT: vmand.mm v25, v0, v25 @@ -191,12 +146,7 @@ ; CHECK-LABEL: selectcc_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: xor a0, a0, a1 -; CHECK-NEXT: andi a1, a0, 1 -; CHECK-NEXT: addi a0, zero, 1 -; CHECK-NEXT: bnez a1, .LBB9_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB9_2: +; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vmsne.vi v25, v25, 0 @@ -212,17 +162,10 @@ define <2 x i8> @select_v2i8(i1 zeroext %c, <2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: select_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB10_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB10_2: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, <2 x i8> %a, <2 x i8> %b ret <2 x i8> %v @@ -231,17 +174,12 @@ define <2 x i8> @selectcc_v2i8(i8 signext %a, i8 signext %b, <2 x i8> %c, <2 x i8> %d) { ; CHECK-LABEL: selectcc_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB11_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a2 -; CHECK-NEXT: vmv.v.x v26, a2 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i8 %a, %b %v = select i1 %cmp, <2 x i8> %c, <2 x i8> %d @@ -251,17 +189,10 @@ define <4 x i8> @select_v4i8(i1 zeroext %c, <4 x i8> %a, <4 x i8> %b) { ; CHECK-LABEL: select_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB12_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB12_2: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, <4 x i8> %a, <4 x i8> %b ret <4 x i8> %v @@ -270,17 +201,12 @@ define <4 x i8> @selectcc_v4i8(i8 signext %a, i8 signext %b, <4 x i8> %c, <4 x i8> %d) { ; CHECK-LABEL: selectcc_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB13_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB13_2: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a2 -; CHECK-NEXT: vmv.v.x v26, a2 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i8 %a, %b %v = select i1 %cmp, <4 x i8> %c, <4 x i8> %d @@ -290,17 +216,10 @@ define <8 x i8> @select_v8i8(i1 zeroext %c, <8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: select_v8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB14_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB14_2: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, <8 x i8> %a, <8 x i8> %b ret <8 x i8> %v @@ -309,17 +228,12 @@ define <8 x i8> @selectcc_v8i8(i8 signext %a, i8 signext %b, <8 x i8> %c, <8 x i8> %d) { ; CHECK-LABEL: selectcc_v8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB15_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB15_2: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a2 -; CHECK-NEXT: vmv.v.x v26, a2 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i8 %a, %b %v = select i1 %cmp, <8 x i8> %c, <8 x i8> %d @@ -329,17 +243,10 @@ define <16 x i8> @select_v16i8(i1 zeroext %c, <16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: select_v16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB16_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB16_2: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, <16 x i8> %a, <16 x i8> %b ret <16 x i8> %v @@ -348,17 +255,12 @@ define <16 x i8> @selectcc_v16i8(i8 signext %a, i8 signext %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-LABEL: selectcc_v16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB17_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB17_2: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a2 -; CHECK-NEXT: vmv.v.x v26, a2 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i8 %a, %b %v = select i1 %cmp, <16 x i8> %c, <16 x i8> %d @@ -368,17 +270,11 @@ define <2 x i16> @select_v2i16(i1 zeroext %c, <2 x i16> %a, <2 x i16> %b) { ; CHECK-LABEL: select_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB18_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB18_2: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, <2 x i16> %a, <2 x i16> %b ret <2 x i16> %v @@ -387,17 +283,13 @@ define <2 x i16> @selectcc_v2i16(i16 signext %a, i16 signext %b, <2 x i16> %c, <2 x i16> %d) { ; CHECK-LABEL: selectcc_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB19_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB19_2: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a2 -; CHECK-NEXT: vmv.v.x v26, a2 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i16 %a, %b %v = select i1 %cmp, <2 x i16> %c, <2 x i16> %d @@ -407,17 +299,11 @@ define <4 x i16> @select_v4i16(i1 zeroext %c, <4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: select_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB20_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB20_2: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, <4 x i16> %a, <4 x i16> %b ret <4 x i16> %v @@ -426,17 +312,13 @@ define <4 x i16> @selectcc_v4i16(i16 signext %a, i16 signext %b, <4 x i16> %c, <4 x i16> %d) { ; CHECK-LABEL: selectcc_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB21_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB21_2: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a2 -; CHECK-NEXT: vmv.v.x v26, a2 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i16 %a, %b %v = select i1 %cmp, <4 x i16> %c, <4 x i16> %d @@ -446,17 +328,11 @@ define <8 x i16> @select_v8i16(i1 zeroext %c, <8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: select_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB22_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB22_2: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, <8 x i16> %a, <8 x i16> %b ret <8 x i16> %v @@ -465,17 +341,13 @@ define <8 x i16> @selectcc_v8i16(i16 signext %a, i16 signext %b, <8 x i16> %c, <8 x i16> %d) { ; CHECK-LABEL: selectcc_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB23_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB23_2: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a2 -; CHECK-NEXT: vmv.v.x v26, a2 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i16 %a, %b %v = select i1 %cmp, <8 x i16> %c, <8 x i16> %d @@ -485,17 +357,11 @@ define <16 x i16> @select_v16i16(i1 zeroext %c, <16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: select_v16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB24_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB24_2: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vand.vx v26, v8, a1 -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: vxor.vi v28, v28, -1 -; CHECK-NEXT: vand.vv v28, v10, v28 -; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, <16 x i16> %a, <16 x i16> %b ret <16 x i16> %v @@ -504,17 +370,13 @@ define <16 x i16> @selectcc_v16i16(i16 signext %a, i16 signext %b, <16 x i16> %c, <16 x i16> %d) { ; CHECK-LABEL: selectcc_v16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB25_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB25_2: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vand.vx v26, v8, a2 -; CHECK-NEXT: vmv.v.x v28, a2 -; CHECK-NEXT: vxor.vi v28, v28, -1 -; CHECK-NEXT: vand.vv v28, v10, v28 -; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i16 %a, %b %v = select i1 %cmp, <16 x i16> %c, <16 x i16> %d @@ -524,17 +386,11 @@ define <2 x i32> @select_v2i32(i1 zeroext %c, <2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: select_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, <2 x i32> %a, <2 x i32> %b ret <2 x i32> %v @@ -543,17 +399,13 @@ define <2 x i32> @selectcc_v2i32(i32 signext %a, i32 signext %b, <2 x i32> %c, <2 x i32> %d) { ; CHECK-LABEL: selectcc_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a2 -; CHECK-NEXT: vmv.v.x v26, a2 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i32 %a, %b %v = select i1 %cmp, <2 x i32> %c, <2 x i32> %d @@ -563,17 +415,11 @@ define <4 x i32> @select_v4i32(i1 zeroext %c, <4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: select_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB28_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB28_2: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, <4 x i32> %a, <4 x i32> %b ret <4 x i32> %v @@ -582,17 +428,13 @@ define <4 x i32> @selectcc_v4i32(i32 signext %a, i32 signext %b, <4 x i32> %c, <4 x i32> %d) { ; CHECK-LABEL: selectcc_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB29_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB29_2: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a2 -; CHECK-NEXT: vmv.v.x v26, a2 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i32 %a, %b %v = select i1 %cmp, <4 x i32> %c, <4 x i32> %d @@ -602,17 +444,11 @@ define <8 x i32> @select_v8i32(i1 zeroext %c, <8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: select_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB30_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB30_2: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vand.vx v26, v8, a1 -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: vxor.vi v28, v28, -1 -; CHECK-NEXT: vand.vv v28, v10, v28 -; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, <8 x i32> %a, <8 x i32> %b ret <8 x i32> %v @@ -621,17 +457,13 @@ define <8 x i32> @selectcc_v8i32(i32 signext %a, i32 signext %b, <8 x i32> %c, <8 x i32> %d) { ; CHECK-LABEL: selectcc_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB31_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB31_2: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vand.vx v26, v8, a2 -; CHECK-NEXT: vmv.v.x v28, a2 -; CHECK-NEXT: vxor.vi v28, v28, -1 -; CHECK-NEXT: vand.vv v28, v10, v28 -; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i32 %a, %b %v = select i1 %cmp, <8 x i32> %c, <8 x i32> %d @@ -641,17 +473,11 @@ define <16 x i32> @select_v16i32(i1 zeroext %c, <16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: select_v16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB32_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vand.vx v28, v8, a1 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vxor.vi v8, v8, -1 -; CHECK-NEXT: vand.vv v8, v12, v8 -; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, <16 x i32> %a, <16 x i32> %b ret <16 x i32> %v @@ -660,17 +486,13 @@ define <16 x i32> @selectcc_v16i32(i32 signext %a, i32 signext %b, <16 x i32> %c, <16 x i32> %d) { ; CHECK-LABEL: selectcc_v16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB33_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB33_2: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vand.vx v28, v8, a2 -; CHECK-NEXT: vmv.v.x v8, a2 -; CHECK-NEXT: vxor.vi v8, v8, -1 -; CHECK-NEXT: vand.vv v8, v12, v8 -; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i32 %a, %b %v = select i1 %cmp, <16 x i32> %c, <16 x i32> %d @@ -678,39 +500,14 @@ } define <2 x i64> @select_v2i64(i1 zeroext %c, <2 x i64> %a, <2 x i64> %b) { -; RV32-LABEL: select_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi a1, zero, -1 -; RV32-NEXT: bnez a0, .LBB34_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, zero -; RV32-NEXT: .LBB34_2: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vmv.v.x v25, a1 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vand.vv v26, v8, v25 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vmv.v.i v27, -1 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vxor.vv v25, v25, v27 -; RV32-NEXT: vand.vv v25, v9, v25 -; RV32-NEXT: vor.vv v8, v26, v25 -; RV32-NEXT: ret -; -; RV64-LABEL: select_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: addi a1, zero, -1 -; RV64-NEXT: bnez a0, .LBB34_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, zero -; RV64-NEXT: .LBB34_2: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV64-NEXT: vand.vx v25, v8, a1 -; RV64-NEXT: vmv.v.x v26, a1 -; RV64-NEXT: vxor.vi v26, v26, -1 -; RV64-NEXT: vand.vv v26, v9, v26 -; RV64-NEXT: vor.vv v8, v25, v26 -; RV64-NEXT: ret +; CHECK-LABEL: select_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret %v = select i1 %c, <2 x i64> %a, <2 x i64> %b ret <2 x i64> %v } @@ -720,37 +517,24 @@ ; RV32: # %bb.0: ; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: xor a0, a0, a2 -; RV32-NEXT: or a1, a0, a1 -; RV32-NEXT: addi a0, zero, -1 -; RV32-NEXT: bnez a1, .LBB35_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, zero -; RV32-NEXT: .LBB35_2: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, mu ; RV32-NEXT: vmv.v.x v25, a0 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vand.vv v26, v8, v25 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vmv.v.i v27, -1 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vxor.vv v25, v25, v27 -; RV32-NEXT: vand.vv v25, v9, v25 -; RV32-NEXT: vor.vv v8, v26, v25 +; RV32-NEXT: vmsne.vi v0, v25, 0 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 ; RV32-NEXT: ret ; ; RV64-LABEL: selectcc_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: addi a2, zero, -1 -; RV64-NEXT: bne a0, a1, .LBB35_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, zero -; RV64-NEXT: .LBB35_2: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV64-NEXT: vand.vx v25, v8, a2 -; RV64-NEXT: vmv.v.x v26, a2 -; RV64-NEXT: vxor.vi v26, v26, -1 -; RV64-NEXT: vand.vv v26, v9, v26 -; RV64-NEXT: vor.vv v8, v25, v26 +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vmsne.vi v0, v25, 0 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vmerge.vvm v8, v9, v8, v0 ; RV64-NEXT: ret %cmp = icmp ne i64 %a, %b %v = select i1 %cmp, <2 x i64> %c, <2 x i64> %d @@ -758,39 +542,14 @@ } define <4 x i64> @select_v4i64(i1 zeroext %c, <4 x i64> %a, <4 x i64> %b) { -; RV32-LABEL: select_v4i64: -; RV32: # %bb.0: -; RV32-NEXT: addi a1, zero, -1 -; RV32-NEXT: bnez a0, .LBB36_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, zero -; RV32-NEXT: .LBB36_2: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vmv.v.x v26, a1 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV32-NEXT: vand.vv v28, v8, v26 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vmv.v.i v30, -1 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV32-NEXT: vxor.vv v26, v26, v30 -; RV32-NEXT: vand.vv v26, v10, v26 -; RV32-NEXT: vor.vv v8, v28, v26 -; RV32-NEXT: ret -; -; RV64-LABEL: select_v4i64: -; RV64: # %bb.0: -; RV64-NEXT: addi a1, zero, -1 -; RV64-NEXT: bnez a0, .LBB36_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, zero -; RV64-NEXT: .LBB36_2: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV64-NEXT: vand.vx v26, v8, a1 -; RV64-NEXT: vmv.v.x v28, a1 -; RV64-NEXT: vxor.vi v28, v28, -1 -; RV64-NEXT: vand.vv v28, v10, v28 -; RV64-NEXT: vor.vv v8, v26, v28 -; RV64-NEXT: ret +; CHECK-LABEL: select_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret %v = select i1 %c, <4 x i64> %a, <4 x i64> %b ret <4 x i64> %v } @@ -800,37 +559,24 @@ ; RV32: # %bb.0: ; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: xor a0, a0, a2 -; RV32-NEXT: or a1, a0, a1 -; RV32-NEXT: addi a0, zero, -1 -; RV32-NEXT: bnez a1, .LBB37_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, zero -; RV32-NEXT: .LBB37_2: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vmv.v.x v26, a0 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV32-NEXT: vand.vv v28, v8, v26 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vmv.v.i v30, -1 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV32-NEXT: vxor.vv v26, v26, v30 -; RV32-NEXT: vand.vv v26, v10, v26 -; RV32-NEXT: vor.vv v8, v28, v26 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vmsne.vi v0, v25, 0 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV32-NEXT: ret ; ; RV64-LABEL: selectcc_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: addi a2, zero, -1 -; RV64-NEXT: bne a0, a1, .LBB37_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, zero -; RV64-NEXT: .LBB37_2: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV64-NEXT: vand.vx v26, v8, a2 -; RV64-NEXT: vmv.v.x v28, a2 -; RV64-NEXT: vxor.vi v28, v28, -1 -; RV64-NEXT: vand.vv v28, v10, v28 -; RV64-NEXT: vor.vv v8, v26, v28 +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vmsne.vi v0, v25, 0 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-NEXT: ret %cmp = icmp ne i64 %a, %b %v = select i1 %cmp, <4 x i64> %c, <4 x i64> %d @@ -838,39 +584,14 @@ } define <8 x i64> @select_v8i64(i1 zeroext %c, <8 x i64> %a, <8 x i64> %b) { -; RV32-LABEL: select_v8i64: -; RV32: # %bb.0: -; RV32-NEXT: addi a1, zero, -1 -; RV32-NEXT: bnez a0, .LBB38_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, zero -; RV32-NEXT: .LBB38_2: -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vmv.v.x v28, a1 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vand.vv v8, v8, v28 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vxor.vv v28, v28, v16 -; RV32-NEXT: vand.vv v28, v12, v28 -; RV32-NEXT: vor.vv v8, v8, v28 -; RV32-NEXT: ret -; -; RV64-LABEL: select_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: addi a1, zero, -1 -; RV64-NEXT: bnez a0, .LBB38_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, zero -; RV64-NEXT: .LBB38_2: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vand.vx v28, v8, a1 -; RV64-NEXT: vmv.v.x v8, a1 -; RV64-NEXT: vxor.vi v8, v8, -1 -; RV64-NEXT: vand.vv v8, v12, v8 -; RV64-NEXT: vor.vv v8, v28, v8 -; RV64-NEXT: ret +; CHECK-LABEL: select_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: ret %v = select i1 %c, <8 x i64> %a, <8 x i64> %b ret <8 x i64> %v } @@ -880,37 +601,24 @@ ; RV32: # %bb.0: ; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: xor a0, a0, a2 -; RV32-NEXT: or a1, a0, a1 -; RV32-NEXT: addi a0, zero, -1 -; RV32-NEXT: bnez a1, .LBB39_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, zero -; RV32-NEXT: .LBB39_2: -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vmv.v.x v28, a0 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vand.vv v8, v8, v28 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vxor.vv v28, v28, v16 -; RV32-NEXT: vand.vv v28, v12, v28 -; RV32-NEXT: vor.vv v8, v8, v28 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vmsne.vi v0, v25, 0 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vmerge.vvm v8, v12, v8, v0 ; RV32-NEXT: ret ; ; RV64-LABEL: selectcc_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: addi a2, zero, -1 -; RV64-NEXT: bne a0, a1, .LBB39_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, zero -; RV64-NEXT: .LBB39_2: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vand.vx v28, v8, a2 -; RV64-NEXT: vmv.v.x v8, a2 -; RV64-NEXT: vxor.vi v8, v8, -1 -; RV64-NEXT: vand.vv v8, v12, v8 -; RV64-NEXT: vor.vv v8, v28, v8 +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vmsne.vi v0, v25, 0 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vmerge.vvm v8, v12, v8, v0 ; RV64-NEXT: ret %cmp = icmp ne i64 %a, %b %v = select i1 %cmp, <8 x i64> %c, <8 x i64> %d @@ -918,40 +626,14 @@ } define <16 x i64> @select_v16i64(i1 zeroext %c, <16 x i64> %a, <16 x i64> %b) { -; RV32-LABEL: select_v16i64: -; RV32: # %bb.0: -; RV32-NEXT: addi a1, zero, -1 -; RV32-NEXT: bnez a0, .LBB40_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, zero -; RV32-NEXT: .LBB40_2: -; RV32-NEXT: addi a0, zero, 32 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; RV32-NEXT: vmv.v.x v24, a1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; RV32-NEXT: vmv.v.i v0, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: vxor.vv v24, v24, v0 -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: ret -; -; RV64-LABEL: select_v16i64: -; RV64: # %bb.0: -; RV64-NEXT: addi a1, zero, -1 -; RV64-NEXT: bnez a0, .LBB40_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, zero -; RV64-NEXT: .LBB40_2: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vmv.v.x v24, a1 -; RV64-NEXT: vxor.vi v24, v24, -1 -; RV64-NEXT: vand.vv v16, v16, v24 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: ret +; CHECK-LABEL: select_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: ret %v = select i1 %c, <16 x i64> %a, <16 x i64> %b ret <16 x i64> %v } @@ -961,38 +643,24 @@ ; RV32: # %bb.0: ; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: xor a0, a0, a2 -; RV32-NEXT: or a1, a0, a1 -; RV32-NEXT: addi a0, zero, -1 -; RV32-NEXT: bnez a1, .LBB41_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, zero -; RV32-NEXT: .LBB41_2: -; RV32-NEXT: addi a1, zero, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; RV32-NEXT: vmv.v.x v24, a0 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; RV32-NEXT: vmv.v.i v0, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: vxor.vv v24, v24, v0 -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vmsne.vi v0, v25, 0 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vmerge.vvm v8, v16, v8, v0 ; RV32-NEXT: ret ; ; RV64-LABEL: selectcc_v16i64: ; RV64: # %bb.0: -; RV64-NEXT: addi a2, zero, -1 -; RV64-NEXT: bne a0, a1, .LBB41_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, zero -; RV64-NEXT: .LBB41_2: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmv.v.x v24, a2 -; RV64-NEXT: vxor.vi v24, v24, -1 -; RV64-NEXT: vand.vv v16, v16, v24 -; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vmsne.vi v0, v25, 0 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vmerge.vvm v8, v16, v8, v0 ; RV64-NEXT: ret %cmp = icmp ne i64 %a, %b %v = select i1 %cmp, <16 x i64> %c, <16 x i64> %d diff --git a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll @@ -1,23 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: -verify-machineinstrs < %s | FileCheck %s define @select_nxv1f16(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB0_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -26,18 +20,12 @@ define @selectcc_nxv1f16(half %a, half %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: feq.h a1, fa0, fa1 -; CHECK-NEXT: addi a0, zero, -1 -; CHECK-NEXT: bnez a1, .LBB1_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a0 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: feq.h a0, fa0, fa1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = fcmp oeq half %a, %b %v = select i1 %cmp, %c, %d @@ -47,17 +35,11 @@ define @select_nxv2f16(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB2_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -66,18 +48,12 @@ define @selectcc_nxv2f16(half %a, half %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: feq.h a1, fa0, fa1 -; CHECK-NEXT: addi a0, zero, -1 -; CHECK-NEXT: bnez a1, .LBB3_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB3_2: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a0 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: feq.h a0, fa0, fa1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = fcmp oeq half %a, %b %v = select i1 %cmp, %c, %d @@ -87,17 +63,11 @@ define @select_nxv4f16(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB4_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -106,18 +76,12 @@ define @selectcc_nxv4f16(half %a, half %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: feq.h a1, fa0, fa1 -; CHECK-NEXT: addi a0, zero, -1 -; CHECK-NEXT: bnez a1, .LBB5_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB5_2: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a0 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: feq.h a0, fa0, fa1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = fcmp oeq half %a, %b %v = select i1 %cmp, %c, %d @@ -127,17 +91,11 @@ define @select_nxv8f16(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB6_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB6_2: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vand.vx v26, v8, a1 -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: vxor.vi v28, v28, -1 -; CHECK-NEXT: vand.vv v28, v10, v28 -; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -146,18 +104,12 @@ define @selectcc_nxv8f16(half %a, half %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: feq.h a1, fa0, fa1 -; CHECK-NEXT: addi a0, zero, -1 -; CHECK-NEXT: bnez a1, .LBB7_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu -; CHECK-NEXT: vand.vx v26, v8, a0 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vxor.vi v28, v28, -1 -; CHECK-NEXT: vand.vv v28, v10, v28 -; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: feq.h a0, fa0, fa1 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %cmp = fcmp oeq half %a, %b %v = select i1 %cmp, %c, %d @@ -167,17 +119,11 @@ define @select_nxv16f16(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB8_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB8_2: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vand.vx v28, v8, a1 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vxor.vi v8, v8, -1 -; CHECK-NEXT: vand.vv v8, v12, v8 -; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vmsne.vi v0, v26, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -186,18 +132,12 @@ define @selectcc_nxv16f16(half %a, half %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: feq.h a1, fa0, fa1 -; CHECK-NEXT: addi a0, zero, -1 -; CHECK-NEXT: bnez a1, .LBB9_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB9_2: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, mu -; CHECK-NEXT: vand.vx v28, v8, a0 -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vxor.vi v8, v8, -1 -; CHECK-NEXT: vand.vv v8, v12, v8 -; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: feq.h a0, fa0, fa1 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vmsne.vi v0, v26, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret %cmp = fcmp oeq half %a, %b %v = select i1 %cmp, %c, %d @@ -207,17 +147,11 @@ define @select_nxv32f16(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB10_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB10_2: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: vxor.vi v24, v24, -1 -; CHECK-NEXT: vand.vv v16, v16, v24 -; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu +; CHECK-NEXT: vmv.v.x v28, a0 +; CHECK-NEXT: vmsne.vi v0, v28, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -226,18 +160,12 @@ define @selectcc_nxv32f16(half %a, half %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: feq.h a1, fa0, fa1 -; CHECK-NEXT: addi a0, zero, -1 -; CHECK-NEXT: bnez a1, .LBB11_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB11_2: -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vxor.vi v24, v24, -1 -; CHECK-NEXT: vand.vv v16, v16, v24 -; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: feq.h a0, fa0, fa1 +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu +; CHECK-NEXT: vmv.v.x v28, a0 +; CHECK-NEXT: vmsne.vi v0, v28, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %cmp = fcmp oeq half %a, %b %v = select i1 %cmp, %c, %d @@ -247,17 +175,11 @@ define @select_nxv1f32(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB12_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB12_2: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -266,18 +188,12 @@ define @selectcc_nxv1f32(float %a, float %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: feq.s a1, fa0, fa1 -; CHECK-NEXT: addi a0, zero, -1 -; CHECK-NEXT: bnez a1, .LBB13_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB13_2: -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a0 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: feq.s a0, fa0, fa1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = fcmp oeq float %a, %b %v = select i1 %cmp, %c, %d @@ -287,17 +203,11 @@ define @select_nxv2f32(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB14_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB14_2: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -306,18 +216,12 @@ define @selectcc_nxv2f32(float %a, float %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: feq.s a1, fa0, fa1 -; CHECK-NEXT: addi a0, zero, -1 -; CHECK-NEXT: bnez a1, .LBB15_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB15_2: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a0 -; CHECK-NEXT: vmv.v.x v26, a0 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: feq.s a0, fa0, fa1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = fcmp oeq float %a, %b %v = select i1 %cmp, %c, %d @@ -327,17 +231,11 @@ define @select_nxv4f32(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB16_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB16_2: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vand.vx v26, v8, a1 -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: vxor.vi v28, v28, -1 -; CHECK-NEXT: vand.vv v28, v10, v28 -; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -346,18 +244,12 @@ define @selectcc_nxv4f32(float %a, float %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: feq.s a1, fa0, fa1 -; CHECK-NEXT: addi a0, zero, -1 -; CHECK-NEXT: bnez a1, .LBB17_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB17_2: -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu -; CHECK-NEXT: vand.vx v26, v8, a0 -; CHECK-NEXT: vmv.v.x v28, a0 -; CHECK-NEXT: vxor.vi v28, v28, -1 -; CHECK-NEXT: vand.vv v28, v10, v28 -; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: feq.s a0, fa0, fa1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %cmp = fcmp oeq float %a, %b %v = select i1 %cmp, %c, %d @@ -367,17 +259,11 @@ define @select_nxv8f32(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB18_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB18_2: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vand.vx v28, v8, a1 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vxor.vi v8, v8, -1 -; CHECK-NEXT: vand.vv v8, v12, v8 -; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -386,18 +272,12 @@ define @selectcc_nxv8f32(float %a, float %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: feq.s a1, fa0, fa1 -; CHECK-NEXT: addi a0, zero, -1 -; CHECK-NEXT: bnez a1, .LBB19_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB19_2: -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu -; CHECK-NEXT: vand.vx v28, v8, a0 -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vxor.vi v8, v8, -1 -; CHECK-NEXT: vand.vv v8, v12, v8 -; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: feq.s a0, fa0, fa1 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret %cmp = fcmp oeq float %a, %b %v = select i1 %cmp, %c, %d @@ -407,17 +287,11 @@ define @select_nxv16f32(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB20_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB20_2: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: vxor.vi v24, v24, -1 -; CHECK-NEXT: vand.vv v16, v16, v24 -; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vmsne.vi v0, v26, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -426,18 +300,12 @@ define @selectcc_nxv16f32(float %a, float %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: feq.s a1, fa0, fa1 -; CHECK-NEXT: addi a0, zero, -1 -; CHECK-NEXT: bnez a1, .LBB21_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB21_2: -; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, mu -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vxor.vi v24, v24, -1 -; CHECK-NEXT: vand.vv v16, v16, v24 -; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: feq.s a0, fa0, fa1 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vmsne.vi v0, v26, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %cmp = fcmp oeq float %a, %b %v = select i1 %cmp, %c, %d @@ -445,332 +313,112 @@ } define @select_nxv1f64(i1 zeroext %c, %a, %b) { -; RV32-LABEL: select_nxv1f64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: addi a1, zero, -1 -; RV32-NEXT: bnez a0, .LBB22_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, zero -; RV32-NEXT: .LBB22_2: -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vand.vv v26, v8, v25 -; RV32-NEXT: vxor.vi v25, v25, -1 -; RV32-NEXT: vand.vv v25, v9, v25 -; RV32-NEXT: vor.vv v8, v26, v25 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: select_nxv1f64: -; RV64: # %bb.0: -; RV64-NEXT: addi a1, zero, -1 -; RV64-NEXT: bnez a0, .LBB22_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, zero -; RV64-NEXT: .LBB22_2: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; RV64-NEXT: vand.vx v25, v8, a1 -; RV64-NEXT: vmv.v.x v26, a1 -; RV64-NEXT: vxor.vi v26, v26, -1 -; RV64-NEXT: vand.vv v26, v9, v26 -; RV64-NEXT: vor.vv v8, v25, v26 -; RV64-NEXT: ret +; CHECK-LABEL: select_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v } define @selectcc_nxv1f64(double %a, double %b, %c, %d) { -; RV32-LABEL: selectcc_nxv1f64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: feq.d a1, fa0, fa1 -; RV32-NEXT: addi a0, zero, -1 -; RV32-NEXT: bnez a1, .LBB23_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, zero -; RV32-NEXT: .LBB23_2: -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vand.vv v26, v8, v25 -; RV32-NEXT: vxor.vi v25, v25, -1 -; RV32-NEXT: vand.vv v25, v9, v25 -; RV32-NEXT: vor.vv v8, v26, v25 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: selectcc_nxv1f64: -; RV64: # %bb.0: -; RV64-NEXT: feq.d a1, fa0, fa1 -; RV64-NEXT: addi a0, zero, -1 -; RV64-NEXT: bnez a1, .LBB23_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, zero -; RV64-NEXT: .LBB23_2: -; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu -; RV64-NEXT: vand.vx v25, v8, a0 -; RV64-NEXT: vmv.v.x v26, a0 -; RV64-NEXT: vxor.vi v26, v26, -1 -; RV64-NEXT: vand.vv v26, v9, v26 -; RV64-NEXT: vor.vv v8, v25, v26 -; RV64-NEXT: ret +; CHECK-LABEL: selectcc_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.d a0, fa0, fa1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret %cmp = fcmp oeq double %a, %b %v = select i1 %cmp, %c, %d ret %v } define @select_nxv2f64(i1 zeroext %c, %a, %b) { -; RV32-LABEL: select_nxv2f64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: addi a1, zero, -1 -; RV32-NEXT: bnez a0, .LBB24_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, zero -; RV32-NEXT: .LBB24_2: -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: vand.vv v28, v8, v26 -; RV32-NEXT: vxor.vi v26, v26, -1 -; RV32-NEXT: vand.vv v26, v10, v26 -; RV32-NEXT: vor.vv v8, v28, v26 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: select_nxv2f64: -; RV64: # %bb.0: -; RV64-NEXT: addi a1, zero, -1 -; RV64-NEXT: bnez a0, .LBB24_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, zero -; RV64-NEXT: .LBB24_2: -; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; RV64-NEXT: vand.vx v26, v8, a1 -; RV64-NEXT: vmv.v.x v28, a1 -; RV64-NEXT: vxor.vi v28, v28, -1 -; RV64-NEXT: vand.vv v28, v10, v28 -; RV64-NEXT: vor.vv v8, v26, v28 -; RV64-NEXT: ret +; CHECK-LABEL: select_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v } define @selectcc_nxv2f64(double %a, double %b, %c, %d) { -; RV32-LABEL: selectcc_nxv2f64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: feq.d a1, fa0, fa1 -; RV32-NEXT: addi a0, zero, -1 -; RV32-NEXT: bnez a1, .LBB25_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, zero -; RV32-NEXT: .LBB25_2: -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: vand.vv v28, v8, v26 -; RV32-NEXT: vxor.vi v26, v26, -1 -; RV32-NEXT: vand.vv v26, v10, v26 -; RV32-NEXT: vor.vv v8, v28, v26 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: selectcc_nxv2f64: -; RV64: # %bb.0: -; RV64-NEXT: feq.d a1, fa0, fa1 -; RV64-NEXT: addi a0, zero, -1 -; RV64-NEXT: bnez a1, .LBB25_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, zero -; RV64-NEXT: .LBB25_2: -; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu -; RV64-NEXT: vand.vx v26, v8, a0 -; RV64-NEXT: vmv.v.x v28, a0 -; RV64-NEXT: vxor.vi v28, v28, -1 -; RV64-NEXT: vand.vv v28, v10, v28 -; RV64-NEXT: vor.vv v8, v26, v28 -; RV64-NEXT: ret +; CHECK-LABEL: selectcc_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.d a0, fa0, fa1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret %cmp = fcmp oeq double %a, %b %v = select i1 %cmp, %c, %d ret %v } define @select_nxv4f64(i1 zeroext %c, %a, %b) { -; RV32-LABEL: select_nxv4f64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: addi a1, zero, -1 -; RV32-NEXT: bnez a0, .LBB26_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, zero -; RV32-NEXT: .LBB26_2: -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v28, (a0), zero -; RV32-NEXT: vand.vv v8, v8, v28 -; RV32-NEXT: vxor.vi v28, v28, -1 -; RV32-NEXT: vand.vv v28, v12, v28 -; RV32-NEXT: vor.vv v8, v8, v28 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: select_nxv4f64: -; RV64: # %bb.0: -; RV64-NEXT: addi a1, zero, -1 -; RV64-NEXT: bnez a0, .LBB26_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, zero -; RV64-NEXT: .LBB26_2: -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; RV64-NEXT: vand.vx v28, v8, a1 -; RV64-NEXT: vmv.v.x v8, a1 -; RV64-NEXT: vxor.vi v8, v8, -1 -; RV64-NEXT: vand.vv v8, v12, v8 -; RV64-NEXT: vor.vv v8, v28, v8 -; RV64-NEXT: ret +; CHECK-LABEL: select_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v } define @selectcc_nxv4f64(double %a, double %b, %c, %d) { -; RV32-LABEL: selectcc_nxv4f64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: feq.d a1, fa0, fa1 -; RV32-NEXT: addi a0, zero, -1 -; RV32-NEXT: bnez a1, .LBB27_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, zero -; RV32-NEXT: .LBB27_2: -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v28, (a0), zero -; RV32-NEXT: vand.vv v8, v8, v28 -; RV32-NEXT: vxor.vi v28, v28, -1 -; RV32-NEXT: vand.vv v28, v12, v28 -; RV32-NEXT: vor.vv v8, v8, v28 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: selectcc_nxv4f64: -; RV64: # %bb.0: -; RV64-NEXT: feq.d a1, fa0, fa1 -; RV64-NEXT: addi a0, zero, -1 -; RV64-NEXT: bnez a1, .LBB27_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, zero -; RV64-NEXT: .LBB27_2: -; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu -; RV64-NEXT: vand.vx v28, v8, a0 -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vxor.vi v8, v8, -1 -; RV64-NEXT: vand.vv v8, v12, v8 -; RV64-NEXT: vor.vv v8, v28, v8 -; RV64-NEXT: ret +; CHECK-LABEL: selectcc_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.d a0, fa0, fa1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: ret %cmp = fcmp oeq double %a, %b %v = select i1 %cmp, %c, %d ret %v } define @select_nxv8f64(i1 zeroext %c, %a, %b) { -; RV32-LABEL: select_nxv8f64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: addi a1, zero, -1 -; RV32-NEXT: bnez a0, .LBB28_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, zero -; RV32-NEXT: .LBB28_2: -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vxor.vi v24, v24, -1 -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: select_nxv8f64: -; RV64: # %bb.0: -; RV64-NEXT: addi a1, zero, -1 -; RV64-NEXT: bnez a0, .LBB28_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, zero -; RV64-NEXT: .LBB28_2: -; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vmv.v.x v24, a1 -; RV64-NEXT: vxor.vi v24, v24, -1 -; RV64-NEXT: vand.vv v16, v16, v24 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: ret +; CHECK-LABEL: select_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v } define @selectcc_nxv8f64(double %a, double %b, %c, %d) { -; RV32-LABEL: selectcc_nxv8f64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: feq.d a1, fa0, fa1 -; RV32-NEXT: addi a0, zero, -1 -; RV32-NEXT: bnez a1, .LBB29_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, zero -; RV32-NEXT: .LBB29_2: -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vxor.vi v24, v24, -1 -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: selectcc_nxv8f64: -; RV64: # %bb.0: -; RV64-NEXT: feq.d a1, fa0, fa1 -; RV64-NEXT: addi a0, zero, -1 -; RV64-NEXT: bnez a1, .LBB29_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, zero -; RV64-NEXT: .LBB29_2: -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmv.v.x v24, a0 -; RV64-NEXT: vxor.vi v24, v24, -1 -; RV64-NEXT: vand.vv v16, v16, v24 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: ret +; CHECK-LABEL: selectcc_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.d a0, fa0, fa1 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: ret %cmp = fcmp oeq double %a, %b %v = select i1 %cmp, %c, %d ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/select-int.ll b/llvm/test/CodeGen/RISCV/rvv/select-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/select-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/select-int.ll @@ -7,13 +7,8 @@ define @select_nxv1i1(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv1i1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, 1 -; CHECK-NEXT: bnez a0, .LBB0_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu -; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vmsne.vi v25, v25, 0 ; CHECK-NEXT: vmandnot.mm v26, v8, v25 ; CHECK-NEXT: vmand.mm v25, v0, v25 @@ -27,12 +22,7 @@ ; CHECK-LABEL: selectcc_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: xor a0, a0, a1 -; CHECK-NEXT: andi a1, a0, 1 -; CHECK-NEXT: addi a0, zero, 1 -; CHECK-NEXT: bnez a1, .LBB1_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vmsne.vi v25, v25, 0 @@ -48,13 +38,8 @@ define @select_nxv2i1(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv2i1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, 1 -; CHECK-NEXT: bnez a0, .LBB2_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vmsne.vi v25, v25, 0 ; CHECK-NEXT: vmandnot.mm v26, v8, v25 ; CHECK-NEXT: vmand.mm v25, v0, v25 @@ -68,12 +53,7 @@ ; CHECK-LABEL: selectcc_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: xor a0, a0, a1 -; CHECK-NEXT: andi a1, a0, 1 -; CHECK-NEXT: addi a0, zero, 1 -; CHECK-NEXT: bnez a1, .LBB3_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vmsne.vi v25, v25, 0 @@ -89,13 +69,8 @@ define @select_nxv4i1(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv4i1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, 1 -; CHECK-NEXT: bnez a0, .LBB4_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vmsne.vi v25, v25, 0 ; CHECK-NEXT: vmandnot.mm v26, v8, v25 ; CHECK-NEXT: vmand.mm v25, v0, v25 @@ -109,12 +84,7 @@ ; CHECK-LABEL: selectcc_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: xor a0, a0, a1 -; CHECK-NEXT: andi a1, a0, 1 -; CHECK-NEXT: addi a0, zero, 1 -; CHECK-NEXT: bnez a1, .LBB5_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vmsne.vi v25, v25, 0 @@ -130,13 +100,8 @@ define @select_nxv8i1(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv8i1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, 1 -; CHECK-NEXT: bnez a0, .LBB6_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB6_2: -; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu -; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vmsne.vi v25, v25, 0 ; CHECK-NEXT: vmandnot.mm v26, v8, v25 ; CHECK-NEXT: vmand.mm v25, v0, v25 @@ -150,12 +115,7 @@ ; CHECK-LABEL: selectcc_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: xor a0, a0, a1 -; CHECK-NEXT: andi a1, a0, 1 -; CHECK-NEXT: addi a0, zero, 1 -; CHECK-NEXT: bnez a1, .LBB7_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vmsne.vi v25, v25, 0 @@ -171,13 +131,8 @@ define @select_nxv16i1(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv16i1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, 1 -; CHECK-NEXT: bnez a0, .LBB8_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB8_2: -; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu -; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vmv.v.x v26, a0 ; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vmandnot.mm v26, v8, v25 ; CHECK-NEXT: vmand.mm v25, v0, v25 @@ -191,12 +146,7 @@ ; CHECK-LABEL: selectcc_nxv16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: xor a0, a0, a1 -; CHECK-NEXT: andi a1, a0, 1 -; CHECK-NEXT: addi a0, zero, 1 -; CHECK-NEXT: bnez a1, .LBB9_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB9_2: +; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu ; CHECK-NEXT: vmv.v.x v26, a0 ; CHECK-NEXT: vmsne.vi v25, v26, 0 @@ -212,13 +162,8 @@ define @select_nxv32i1(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv32i1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, 1 -; CHECK-NEXT: bnez a0, .LBB10_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB10_2: -; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu -; CHECK-NEXT: vmv.v.x v28, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu +; CHECK-NEXT: vmv.v.x v28, a0 ; CHECK-NEXT: vmsne.vi v25, v28, 0 ; CHECK-NEXT: vmandnot.mm v26, v8, v25 ; CHECK-NEXT: vmand.mm v25, v0, v25 @@ -232,12 +177,7 @@ ; CHECK-LABEL: selectcc_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: xor a0, a0, a1 -; CHECK-NEXT: andi a1, a0, 1 -; CHECK-NEXT: addi a0, zero, 1 -; CHECK-NEXT: bnez a1, .LBB11_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu ; CHECK-NEXT: vmv.v.x v28, a0 ; CHECK-NEXT: vmsne.vi v25, v28, 0 @@ -253,13 +193,8 @@ define @select_nxv64i1(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv64i1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, 1 -; CHECK-NEXT: bnez a0, .LBB12_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB12_2: -; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; CHECK-NEXT: vmv.v.x v16, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, mu +; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vmsne.vi v25, v16, 0 ; CHECK-NEXT: vmandnot.mm v26, v8, v25 ; CHECK-NEXT: vmand.mm v25, v0, v25 @@ -273,12 +208,7 @@ ; CHECK-LABEL: selectcc_nxv64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: xor a0, a0, a1 -; CHECK-NEXT: andi a1, a0, 1 -; CHECK-NEXT: addi a0, zero, 1 -; CHECK-NEXT: bnez a1, .LBB13_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, zero -; CHECK-NEXT: .LBB13_2: +; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, mu ; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vmsne.vi v25, v16, 0 @@ -294,17 +224,10 @@ define @select_nxv1i8(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB14_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB14_2: -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -313,17 +236,12 @@ define @selectcc_nxv1i8(i8 signext %a, i8 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB15_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB15_2: -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a2 -; CHECK-NEXT: vmv.v.x v26, a2 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i8 %a, %b %v = select i1 %cmp, %c, %d @@ -333,17 +251,10 @@ define @select_nxv2i8(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB16_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB16_2: -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -352,17 +263,12 @@ define @selectcc_nxv2i8(i8 signext %a, i8 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB17_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB17_2: -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a2 -; CHECK-NEXT: vmv.v.x v26, a2 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i8 %a, %b %v = select i1 %cmp, %c, %d @@ -372,17 +278,10 @@ define @select_nxv4i8(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB18_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB18_2: -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -391,17 +290,12 @@ define @selectcc_nxv4i8(i8 signext %a, i8 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB19_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB19_2: -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a2 -; CHECK-NEXT: vmv.v.x v26, a2 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i8 %a, %b %v = select i1 %cmp, %c, %d @@ -411,17 +305,10 @@ define @select_nxv8i8(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB20_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB20_2: -; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -430,17 +317,12 @@ define @selectcc_nxv8i8(i8 signext %a, i8 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB21_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB21_2: -; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a2 -; CHECK-NEXT: vmv.v.x v26, a2 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i8 %a, %b %v = select i1 %cmp, %c, %d @@ -450,17 +332,10 @@ define @select_nxv16i8(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB22_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB22_2: -; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu -; CHECK-NEXT: vand.vx v26, v8, a1 -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: vxor.vi v28, v28, -1 -; CHECK-NEXT: vand.vv v28, v10, v28 -; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vmsne.vi v0, v26, 0 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -469,17 +344,12 @@ define @selectcc_nxv16i8(i8 signext %a, i8 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB23_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB23_2: -; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu -; CHECK-NEXT: vand.vx v26, v8, a2 -; CHECK-NEXT: vmv.v.x v28, a2 -; CHECK-NEXT: vxor.vi v28, v28, -1 -; CHECK-NEXT: vand.vv v28, v10, v28 -; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vmsne.vi v0, v26, 0 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i8 %a, %b %v = select i1 %cmp, %c, %d @@ -489,17 +359,10 @@ define @select_nxv32i8(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv32i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB24_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB24_2: -; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu -; CHECK-NEXT: vand.vx v28, v8, a1 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vxor.vi v8, v8, -1 -; CHECK-NEXT: vand.vv v8, v12, v8 -; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu +; CHECK-NEXT: vmv.v.x v28, a0 +; CHECK-NEXT: vmsne.vi v0, v28, 0 +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -508,17 +371,12 @@ define @selectcc_nxv32i8(i8 signext %a, i8 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv32i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB25_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB25_2: -; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu -; CHECK-NEXT: vand.vx v28, v8, a2 -; CHECK-NEXT: vmv.v.x v8, a2 -; CHECK-NEXT: vxor.vi v8, v8, -1 -; CHECK-NEXT: vand.vv v8, v12, v8 -; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu +; CHECK-NEXT: vmv.v.x v28, a0 +; CHECK-NEXT: vmsne.vi v0, v28, 0 +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i8 %a, %b %v = select i1 %cmp, %c, %d @@ -528,17 +386,10 @@ define @select_nxv64i8(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: vxor.vi v24, v24, -1 -; CHECK-NEXT: vand.vv v16, v16, v24 -; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, mu +; CHECK-NEXT: vmv.v.x v24, a0 +; CHECK-NEXT: vmsne.vi v0, v24, 0 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -547,17 +398,12 @@ define @selectcc_nxv64i8(i8 signext %a, i8 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB27_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; CHECK-NEXT: vand.vx v8, v8, a2 -; CHECK-NEXT: vmv.v.x v24, a2 -; CHECK-NEXT: vxor.vi v24, v24, -1 -; CHECK-NEXT: vand.vv v16, v16, v24 -; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, mu +; CHECK-NEXT: vmv.v.x v24, a0 +; CHECK-NEXT: vmsne.vi v0, v24, 0 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i8 %a, %b %v = select i1 %cmp, %c, %d @@ -567,17 +413,11 @@ define @select_nxv1i16(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB28_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB28_2: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -586,17 +426,13 @@ define @selectcc_nxv1i16(i16 signext %a, i16 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB29_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB29_2: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a2 -; CHECK-NEXT: vmv.v.x v26, a2 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i16 %a, %b %v = select i1 %cmp, %c, %d @@ -606,17 +442,11 @@ define @select_nxv2i16(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB30_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB30_2: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -625,17 +455,13 @@ define @selectcc_nxv2i16(i16 signext %a, i16 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB31_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB31_2: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a2 -; CHECK-NEXT: vmv.v.x v26, a2 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i16 %a, %b %v = select i1 %cmp, %c, %d @@ -645,17 +471,11 @@ define @select_nxv4i16(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB32_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -664,17 +484,13 @@ define @selectcc_nxv4i16(i16 signext %a, i16 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB33_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB33_2: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a2 -; CHECK-NEXT: vmv.v.x v26, a2 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i16 %a, %b %v = select i1 %cmp, %c, %d @@ -684,17 +500,11 @@ define @select_nxv8i16(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB34_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB34_2: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vand.vx v26, v8, a1 -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: vxor.vi v28, v28, -1 -; CHECK-NEXT: vand.vv v28, v10, v28 -; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -703,17 +513,13 @@ define @selectcc_nxv8i16(i16 signext %a, i16 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB35_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB35_2: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vand.vx v26, v8, a2 -; CHECK-NEXT: vmv.v.x v28, a2 -; CHECK-NEXT: vxor.vi v28, v28, -1 -; CHECK-NEXT: vand.vv v28, v10, v28 -; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i16 %a, %b %v = select i1 %cmp, %c, %d @@ -723,17 +529,11 @@ define @select_nxv16i16(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB36_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB36_2: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vand.vx v28, v8, a1 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vxor.vi v8, v8, -1 -; CHECK-NEXT: vand.vv v8, v12, v8 -; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vmsne.vi v0, v26, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -742,17 +542,13 @@ define @selectcc_nxv16i16(i16 signext %a, i16 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB37_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB37_2: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vand.vx v28, v8, a2 -; CHECK-NEXT: vmv.v.x v8, a2 -; CHECK-NEXT: vxor.vi v8, v8, -1 -; CHECK-NEXT: vand.vv v8, v12, v8 -; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vmsne.vi v0, v26, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i16 %a, %b %v = select i1 %cmp, %c, %d @@ -762,17 +558,11 @@ define @select_nxv32i16(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB38_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB38_2: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: vxor.vi v24, v24, -1 -; CHECK-NEXT: vand.vv v16, v16, v24 -; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu +; CHECK-NEXT: vmv.v.x v28, a0 +; CHECK-NEXT: vmsne.vi v0, v28, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -781,17 +571,13 @@ define @selectcc_nxv32i16(i16 signext %a, i16 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB39_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB39_2: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vand.vx v8, v8, a2 -; CHECK-NEXT: vmv.v.x v24, a2 -; CHECK-NEXT: vxor.vi v24, v24, -1 -; CHECK-NEXT: vand.vv v16, v16, v24 -; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu +; CHECK-NEXT: vmv.v.x v28, a0 +; CHECK-NEXT: vmsne.vi v0, v28, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i16 %a, %b %v = select i1 %cmp, %c, %d @@ -801,17 +587,11 @@ define @select_nxv1i32(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB40_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB40_2: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -820,17 +600,13 @@ define @selectcc_nxv1i32(i32 signext %a, i32 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB41_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB41_2: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a2 -; CHECK-NEXT: vmv.v.x v26, a2 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i32 %a, %b %v = select i1 %cmp, %c, %d @@ -840,17 +616,11 @@ define @select_nxv2i32(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB42_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB42_2: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a1 -; CHECK-NEXT: vmv.v.x v26, a1 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -859,17 +629,13 @@ define @selectcc_nxv2i32(i32 signext %a, i32 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB43_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB43_2: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vand.vx v25, v8, a2 -; CHECK-NEXT: vmv.v.x v26, a2 -; CHECK-NEXT: vxor.vi v26, v26, -1 -; CHECK-NEXT: vand.vv v26, v9, v26 -; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i32 %a, %b %v = select i1 %cmp, %c, %d @@ -879,17 +645,11 @@ define @select_nxv4i32(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB44_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vand.vx v26, v8, a1 -; CHECK-NEXT: vmv.v.x v28, a1 -; CHECK-NEXT: vxor.vi v28, v28, -1 -; CHECK-NEXT: vand.vv v28, v10, v28 -; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -898,17 +658,13 @@ define @selectcc_nxv4i32(i32 signext %a, i32 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB45_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB45_2: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vand.vx v26, v8, a2 -; CHECK-NEXT: vmv.v.x v28, a2 -; CHECK-NEXT: vxor.vi v28, v28, -1 -; CHECK-NEXT: vand.vv v28, v10, v28 -; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i32 %a, %b %v = select i1 %cmp, %c, %d @@ -918,17 +674,11 @@ define @select_nxv8i32(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB46_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB46_2: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vand.vx v28, v8, a1 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vxor.vi v8, v8, -1 -; CHECK-NEXT: vand.vv v8, v12, v8 -; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -937,17 +687,13 @@ define @selectcc_nxv8i32(i32 signext %a, i32 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB47_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB47_2: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vand.vx v28, v8, a2 -; CHECK-NEXT: vmv.v.x v8, a2 -; CHECK-NEXT: vxor.vi v8, v8, -1 -; CHECK-NEXT: vand.vv v8, v12, v8 -; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i32 %a, %b %v = select i1 %cmp, %c, %d @@ -957,17 +703,11 @@ define @select_nxv16i32(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, zero, -1 -; CHECK-NEXT: bnez a0, .LBB48_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, zero -; CHECK-NEXT: .LBB48_2: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: vxor.vi v24, v24, -1 -; CHECK-NEXT: vand.vv v16, v16, v24 -; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vmsne.vi v0, v26, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -976,17 +716,13 @@ define @selectcc_nxv16i32(i32 signext %a, i32 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a2, zero, -1 -; CHECK-NEXT: bne a0, a1, .LBB49_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, zero -; CHECK-NEXT: .LBB49_2: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vand.vx v8, v8, a2 -; CHECK-NEXT: vmv.v.x v24, a2 -; CHECK-NEXT: vxor.vi v24, v24, -1 -; CHECK-NEXT: vand.vv v16, v16, v24 -; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vmsne.vi v0, v26, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret %cmp = icmp ne i32 %a, %b %v = select i1 %cmp, %c, %d @@ -994,41 +730,14 @@ } define @select_nxv1i64(i1 zeroext %c, %a, %b) { -; RV32-LABEL: select_nxv1i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: addi a1, zero, -1 -; RV32-NEXT: bnez a0, .LBB50_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, zero -; RV32-NEXT: .LBB50_2: -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vand.vv v26, v8, v25 -; RV32-NEXT: vxor.vi v25, v25, -1 -; RV32-NEXT: vand.vv v25, v9, v25 -; RV32-NEXT: vor.vv v8, v26, v25 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: select_nxv1i64: -; RV64: # %bb.0: -; RV64-NEXT: addi a1, zero, -1 -; RV64-NEXT: bnez a0, .LBB50_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, zero -; RV64-NEXT: .LBB50_2: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; RV64-NEXT: vand.vx v25, v8, a1 -; RV64-NEXT: vmv.v.x v26, a1 -; RV64-NEXT: vxor.vi v26, v26, -1 -; RV64-NEXT: vand.vv v26, v9, v26 -; RV64-NEXT: vor.vv v8, v25, v26 -; RV64-NEXT: ret +; CHECK-LABEL: select_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v } @@ -1036,41 +745,26 @@ define @selectcc_nxv1i64(i64 signext %a, i64 signext %b, %c, %d) { ; RV32-LABEL: selectcc_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: xor a0, a0, a2 -; RV32-NEXT: or a1, a0, a1 -; RV32-NEXT: addi a0, zero, -1 -; RV32-NEXT: bnez a1, .LBB51_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, zero -; RV32-NEXT: .LBB51_2: -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: vand.vv v26, v8, v25 -; RV32-NEXT: vxor.vi v25, v25, -1 -; RV32-NEXT: vand.vv v25, v9, v25 -; RV32-NEXT: vor.vv v8, v26, v25 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vmsne.vi v0, v25, 0 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 ; RV32-NEXT: ret ; ; RV64-LABEL: selectcc_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: addi a2, zero, -1 -; RV64-NEXT: bne a0, a1, .LBB51_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, zero -; RV64-NEXT: .LBB51_2: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; RV64-NEXT: vand.vx v25, v8, a2 -; RV64-NEXT: vmv.v.x v26, a2 -; RV64-NEXT: vxor.vi v26, v26, -1 -; RV64-NEXT: vand.vv v26, v9, v26 -; RV64-NEXT: vor.vv v8, v25, v26 +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vmsne.vi v0, v25, 0 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vmerge.vvm v8, v9, v8, v0 ; RV64-NEXT: ret %cmp = icmp ne i64 %a, %b %v = select i1 %cmp, %c, %d @@ -1078,41 +772,14 @@ } define @select_nxv2i64(i1 zeroext %c, %a, %b) { -; RV32-LABEL: select_nxv2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: addi a1, zero, -1 -; RV32-NEXT: bnez a0, .LBB52_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, zero -; RV32-NEXT: .LBB52_2: -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: vand.vv v28, v8, v26 -; RV32-NEXT: vxor.vi v26, v26, -1 -; RV32-NEXT: vand.vv v26, v10, v26 -; RV32-NEXT: vor.vv v8, v28, v26 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: select_nxv2i64: -; RV64: # %bb.0: -; RV64-NEXT: addi a1, zero, -1 -; RV64-NEXT: bnez a0, .LBB52_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, zero -; RV64-NEXT: .LBB52_2: -; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; RV64-NEXT: vand.vx v26, v8, a1 -; RV64-NEXT: vmv.v.x v28, a1 -; RV64-NEXT: vxor.vi v28, v28, -1 -; RV64-NEXT: vand.vv v28, v10, v28 -; RV64-NEXT: vor.vv v8, v26, v28 -; RV64-NEXT: ret +; CHECK-LABEL: select_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v } @@ -1120,41 +787,26 @@ define @selectcc_nxv2i64(i64 signext %a, i64 signext %b, %c, %d) { ; RV32-LABEL: selectcc_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: xor a0, a0, a2 -; RV32-NEXT: or a1, a0, a1 -; RV32-NEXT: addi a0, zero, -1 -; RV32-NEXT: bnez a1, .LBB53_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, zero -; RV32-NEXT: .LBB53_2: -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: vand.vv v28, v8, v26 -; RV32-NEXT: vxor.vi v26, v26, -1 -; RV32-NEXT: vand.vv v26, v10, v26 -; RV32-NEXT: vor.vv v8, v28, v26 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vmsne.vi v0, v25, 0 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV32-NEXT: ret ; ; RV64-LABEL: selectcc_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: addi a2, zero, -1 -; RV64-NEXT: bne a0, a1, .LBB53_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, zero -; RV64-NEXT: .LBB53_2: -; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; RV64-NEXT: vand.vx v26, v8, a2 -; RV64-NEXT: vmv.v.x v28, a2 -; RV64-NEXT: vxor.vi v28, v28, -1 -; RV64-NEXT: vand.vv v28, v10, v28 -; RV64-NEXT: vor.vv v8, v26, v28 +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vmsne.vi v0, v25, 0 +; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-NEXT: ret %cmp = icmp ne i64 %a, %b %v = select i1 %cmp, %c, %d @@ -1162,41 +814,14 @@ } define @select_nxv4i64(i1 zeroext %c, %a, %b) { -; RV32-LABEL: select_nxv4i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: addi a1, zero, -1 -; RV32-NEXT: bnez a0, .LBB54_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, zero -; RV32-NEXT: .LBB54_2: -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v28, (a0), zero -; RV32-NEXT: vand.vv v8, v8, v28 -; RV32-NEXT: vxor.vi v28, v28, -1 -; RV32-NEXT: vand.vv v28, v12, v28 -; RV32-NEXT: vor.vv v8, v8, v28 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: select_nxv4i64: -; RV64: # %bb.0: -; RV64-NEXT: addi a1, zero, -1 -; RV64-NEXT: bnez a0, .LBB54_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, zero -; RV64-NEXT: .LBB54_2: -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; RV64-NEXT: vand.vx v28, v8, a1 -; RV64-NEXT: vmv.v.x v8, a1 -; RV64-NEXT: vxor.vi v8, v8, -1 -; RV64-NEXT: vand.vv v8, v12, v8 -; RV64-NEXT: vor.vv v8, v28, v8 -; RV64-NEXT: ret +; CHECK-LABEL: select_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v } @@ -1204,41 +829,26 @@ define @selectcc_nxv4i64(i64 signext %a, i64 signext %b, %c, %d) { ; RV32-LABEL: selectcc_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: xor a0, a0, a2 -; RV32-NEXT: or a1, a0, a1 -; RV32-NEXT: addi a0, zero, -1 -; RV32-NEXT: bnez a1, .LBB55_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, zero -; RV32-NEXT: .LBB55_2: -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v28, (a0), zero -; RV32-NEXT: vand.vv v8, v8, v28 -; RV32-NEXT: vxor.vi v28, v28, -1 -; RV32-NEXT: vand.vv v28, v12, v28 -; RV32-NEXT: vor.vv v8, v8, v28 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vmsne.vi v0, v25, 0 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vmerge.vvm v8, v12, v8, v0 ; RV32-NEXT: ret ; ; RV64-LABEL: selectcc_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: addi a2, zero, -1 -; RV64-NEXT: bne a0, a1, .LBB55_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, zero -; RV64-NEXT: .LBB55_2: -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; RV64-NEXT: vand.vx v28, v8, a2 -; RV64-NEXT: vmv.v.x v8, a2 -; RV64-NEXT: vxor.vi v8, v8, -1 -; RV64-NEXT: vand.vv v8, v12, v8 -; RV64-NEXT: vor.vv v8, v28, v8 +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vmsne.vi v0, v25, 0 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV64-NEXT: vmerge.vvm v8, v12, v8, v0 ; RV64-NEXT: ret %cmp = icmp ne i64 %a, %b %v = select i1 %cmp, %c, %d @@ -1246,41 +856,14 @@ } define @select_nxv8i64(i1 zeroext %c, %a, %b) { -; RV32-LABEL: select_nxv8i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: addi a1, zero, -1 -; RV32-NEXT: bnez a0, .LBB56_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, zero -; RV32-NEXT: .LBB56_2: -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vxor.vi v24, v24, -1 -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: select_nxv8i64: -; RV64: # %bb.0: -; RV64-NEXT: addi a1, zero, -1 -; RV64-NEXT: bnez a0, .LBB56_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, zero -; RV64-NEXT: .LBB56_2: -; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vmv.v.x v24, a1 -; RV64-NEXT: vxor.vi v24, v24, -1 -; RV64-NEXT: vand.vv v16, v16, v24 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: ret +; CHECK-LABEL: select_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v } @@ -1288,41 +871,26 @@ define @selectcc_nxv8i64(i64 signext %a, i64 signext %b, %c, %d) { ; RV32-LABEL: selectcc_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: xor a0, a0, a2 -; RV32-NEXT: or a1, a0, a1 -; RV32-NEXT: addi a0, zero, -1 -; RV32-NEXT: bnez a1, .LBB57_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, zero -; RV32-NEXT: .LBB57_2: -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vxor.vi v24, v24, -1 -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vmsne.vi v0, v25, 0 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV32-NEXT: vmerge.vvm v8, v16, v8, v0 ; RV32-NEXT: ret ; ; RV64-LABEL: selectcc_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: addi a2, zero, -1 -; RV64-NEXT: bne a0, a1, .LBB57_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, zero -; RV64-NEXT: .LBB57_2: -; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmv.v.x v24, a2 -; RV64-NEXT: vxor.vi v24, v24, -1 -; RV64-NEXT: vand.vv v16, v16, v24 -; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; RV64-NEXT: vmv.v.x v25, a0 +; RV64-NEXT: vmsne.vi v0, v25, 0 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vmerge.vvm v8, v16, v8, v0 ; RV64-NEXT: ret %cmp = icmp ne i64 %a, %b %v = select i1 %cmp, %c, %d