diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -924,11 +924,16 @@ // AND,OR,XOR, we will have to scalarize the op. // Notice that the operation may be 'promoted' which means that it is // 'bitcasted' to another type which is handled. - // Also, we need to be able to construct a splat vector using BUILD_VECTOR. + // Also, we need to be able to construct a splat vector using either + // BUILD_VECTOR or SPLAT_VECTOR. + // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to + // BUILD_VECTOR? if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || - TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || - TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand) + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getOperationAction(VT.isFixedLengthVector() ? ISD::BUILD_VECTOR + : ISD::SPLAT_VECTOR, + VT) == TargetLowering::Expand) return DAG.UnrollVectorOp(Node); // Generate a mask operand. @@ -942,8 +947,11 @@ BitTy), DAG.getConstant(0, DL, BitTy)); - // Broadcast the mask so that the entire vector is all-one or all zero. - Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask); + // Broadcast the mask so that the entire vector is all one or all zero. + if (VT.isFixedLengthVector()) + Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask); + else + Mask = DAG.getSplatVector(MaskTy, DL, Mask); // Bitcast the operands to be the same type as the mask. // This is needed when we select between FP types because diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -442,6 +442,9 @@ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::SELECT, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction(ISD::VECREDUCE_AND, VT, Custom); setOperationAction(ISD::VECREDUCE_OR, VT, Custom); setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); @@ -517,6 +520,9 @@ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::SELECT, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction(ISD::STEP_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); @@ -571,6 +577,9 @@ setOperationAction(ISD::MGATHER, VT, Custom); setOperationAction(ISD::MSCATTER, VT, Custom); + setOperationAction(ISD::SELECT, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); @@ -695,6 +704,8 @@ setOperationAction(ISD::FP_TO_UINT, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); + setOperationAction(ISD::SELECT, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::ANY_EXTEND, VT, Custom); setOperationAction(ISD::SIGN_EXTEND, VT, Custom); @@ -762,6 +773,8 @@ setCondCodeAction(CC, VT, Expand); setOperationAction(ISD::VSELECT, VT, Custom); + setOperationAction(ISD::SELECT, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::BITCAST, VT, Custom); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll @@ -0,0 +1,3752 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +define <2 x half> @select_v2f16(i1 zeroext %c, <2 x half> %a, <2 x half> %b) { +; CHECK-LABEL: select_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft1, v9 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vfmv.f.s ft3, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: vfmv.f.s ft2, v25 +; CHECK-NEXT: bnez a0, .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fmv.h ft0, ft1 +; CHECK-NEXT: fmv.h ft2, ft3 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v8, ft2 +; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: ret + %v = select i1 %c, <2 x half> %a, <2 x half> %b + ret <2 x half> %v +} + +define <2 x half> @selectcc_v2f16(half %a, half %b, <2 x half> %c, <2 x half> %d) { +; CHECK-LABEL: selectcc_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.h a0, fa0, fa1 +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: bnez a0, .LBB1_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fmv.h ft0, ft1 +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vfmv.f.s ft1, v9 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: bnez a0, .LBB1_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: fmv.h ft0, ft1 +; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vfmv.s.f v25, ft0 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %cmp = fcmp oeq half %a, %b + %v = select i1 %cmp, <2 x half> %c, <2 x half> %d + ret <2 x half> %v +} + +define <4 x half> @select_v4f16(i1 zeroext %c, <4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: select_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft1, v9 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: bnez a0, .LBB2_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fmv.h ft0, ft1 +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: fsh ft0, 8(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 3 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 3 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB2_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: fsh ft1, 14(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 2 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB2_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB2_6: +; CHECK-NEXT: fsh ft1, 12(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB2_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB2_8: +; CHECK-NEXT: fsh ft1, 10(sp) +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = select i1 %c, <4 x half> %a, <4 x half> %b + ret <4 x half> %v +} + +define <4 x half> @selectcc_v4f16(half %a, half %b, <4 x half> %c, <4 x half> %d) { +; CHECK-LABEL: selectcc_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: feq.h a0, fa0, fa1 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft1, v9 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: bnez a0, .LBB3_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fmv.h ft0, ft1 +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: fsh ft0, 8(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 3 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 3 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB3_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB3_4: +; CHECK-NEXT: fsh ft1, 14(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 2 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB3_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB3_6: +; CHECK-NEXT: fsh ft1, 12(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB3_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB3_8: +; CHECK-NEXT: fsh ft1, 10(sp) +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %cmp = fcmp oeq half %a, %b + %v = select i1 %cmp, <4 x half> %c, <4 x half> %d + ret <4 x half> %v +} + +define <8 x half> @select_v8f16(i1 zeroext %c, <8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: select_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft1, v9 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: bnez a0, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fmv.h ft0, ft1 +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: fsh ft0, 0(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 7 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 7 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB4_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB4_4: +; CHECK-NEXT: fsh ft1, 14(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 6 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 6 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB4_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB4_6: +; CHECK-NEXT: fsh ft1, 12(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 5 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 5 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB4_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB4_8: +; CHECK-NEXT: fsh ft1, 10(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 4 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 4 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB4_10 +; CHECK-NEXT: # %bb.9: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB4_10: +; CHECK-NEXT: fsh ft1, 8(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 3 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 3 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB4_12 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB4_12: +; CHECK-NEXT: fsh ft1, 6(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 2 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB4_14 +; CHECK-NEXT: # %bb.13: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB4_14: +; CHECK-NEXT: fsh ft1, 4(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB4_16 +; CHECK-NEXT: # %bb.15: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB4_16: +; CHECK-NEXT: fsh ft1, 2(sp) +; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v8, (sp) +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = select i1 %c, <8 x half> %a, <8 x half> %b + ret <8 x half> %v +} + +define <8 x half> @selectcc_v8f16(half %a, half %b, <8 x half> %c, <8 x half> %d) { +; CHECK-LABEL: selectcc_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: feq.h a0, fa0, fa1 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft1, v9 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: bnez a0, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fmv.h ft0, ft1 +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: fsh ft0, 0(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 7 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 7 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB5_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB5_4: +; CHECK-NEXT: fsh ft1, 14(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 6 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 6 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB5_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB5_6: +; CHECK-NEXT: fsh ft1, 12(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 5 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 5 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB5_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB5_8: +; CHECK-NEXT: fsh ft1, 10(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 4 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 4 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB5_10 +; CHECK-NEXT: # %bb.9: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB5_10: +; CHECK-NEXT: fsh ft1, 8(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 3 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 3 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB5_12 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB5_12: +; CHECK-NEXT: fsh ft1, 6(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 2 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB5_14 +; CHECK-NEXT: # %bb.13: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB5_14: +; CHECK-NEXT: fsh ft1, 4(sp) +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB5_16 +; CHECK-NEXT: # %bb.15: +; CHECK-NEXT: fmv.h ft1, ft0 +; CHECK-NEXT: .LBB5_16: +; CHECK-NEXT: fsh ft1, 2(sp) +; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v8, (sp) +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %cmp = fcmp oeq half %a, %b + %v = select i1 %cmp, <8 x half> %c, <8 x half> %d + ret <8 x half> %v +} + +define <16 x half> @select_v16f16(i1 zeroext %c, <16 x half> %a, <16 x half> %b) { +; RV32-LABEL: select_v16f16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -64 +; RV32-NEXT: .cfi_def_cfa_offset 64 +; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: addi s0, sp, 64 +; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: andi sp, sp, -32 +; RV32-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft1, v10 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: bnez a0, .LBB6_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: fmv.h ft0, ft1 +; RV32-NEXT: .LBB6_2: +; RV32-NEXT: fsh ft0, 0(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 15 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 15 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB6_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB6_4: +; RV32-NEXT: fsh ft1, 30(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 14 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 14 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB6_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB6_6: +; RV32-NEXT: fsh ft1, 28(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 13 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 13 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB6_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB6_8: +; RV32-NEXT: fsh ft1, 26(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 12 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 12 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB6_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB6_10: +; RV32-NEXT: fsh ft1, 24(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 11 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 11 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB6_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB6_12: +; RV32-NEXT: fsh ft1, 22(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 10 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 10 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB6_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB6_14: +; RV32-NEXT: fsh ft1, 20(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 9 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 9 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB6_16 +; RV32-NEXT: # %bb.15: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB6_16: +; RV32-NEXT: fsh ft1, 18(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 8 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 8 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB6_18 +; RV32-NEXT: # %bb.17: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB6_18: +; RV32-NEXT: fsh ft1, 16(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 7 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 7 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB6_20 +; RV32-NEXT: # %bb.19: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB6_20: +; RV32-NEXT: fsh ft1, 14(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 6 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 6 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB6_22 +; RV32-NEXT: # %bb.21: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB6_22: +; RV32-NEXT: fsh ft1, 12(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 5 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 5 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB6_24 +; RV32-NEXT: # %bb.23: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB6_24: +; RV32-NEXT: fsh ft1, 10(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 4 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 4 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB6_26 +; RV32-NEXT: # %bb.25: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB6_26: +; RV32-NEXT: fsh ft1, 8(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 3 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 3 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB6_28 +; RV32-NEXT: # %bb.27: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB6_28: +; RV32-NEXT: fsh ft1, 6(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 2 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 2 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB6_30 +; RV32-NEXT: # %bb.29: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB6_30: +; RV32-NEXT: fsh ft1, 4(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 1 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB6_32 +; RV32-NEXT: # %bb.31: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB6_32: +; RV32-NEXT: fsh ft1, 2(sp) +; RV32-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; RV32-NEXT: vle16.v v8, (sp) +; RV32-NEXT: addi sp, s0, -64 +; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 64 +; RV32-NEXT: ret +; +; RV64-LABEL: select_v16f16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: addi s0, sp, 64 +; RV64-NEXT: .cfi_def_cfa s0, 0 +; RV64-NEXT: andi sp, sp, -32 +; RV64-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft1, v10 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: bnez a0, .LBB6_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: fmv.h ft0, ft1 +; RV64-NEXT: .LBB6_2: +; RV64-NEXT: fsh ft0, 0(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 15 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 15 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB6_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB6_4: +; RV64-NEXT: fsh ft1, 30(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 14 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 14 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB6_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB6_6: +; RV64-NEXT: fsh ft1, 28(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 13 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 13 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB6_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB6_8: +; RV64-NEXT: fsh ft1, 26(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 12 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 12 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB6_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB6_10: +; RV64-NEXT: fsh ft1, 24(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 11 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 11 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB6_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB6_12: +; RV64-NEXT: fsh ft1, 22(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 10 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 10 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB6_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB6_14: +; RV64-NEXT: fsh ft1, 20(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 9 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 9 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB6_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB6_16: +; RV64-NEXT: fsh ft1, 18(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 8 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 8 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB6_18 +; RV64-NEXT: # %bb.17: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB6_18: +; RV64-NEXT: fsh ft1, 16(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 7 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 7 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB6_20 +; RV64-NEXT: # %bb.19: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB6_20: +; RV64-NEXT: fsh ft1, 14(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 6 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 6 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB6_22 +; RV64-NEXT: # %bb.21: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB6_22: +; RV64-NEXT: fsh ft1, 12(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 5 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 5 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB6_24 +; RV64-NEXT: # %bb.23: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB6_24: +; RV64-NEXT: fsh ft1, 10(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 4 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 4 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB6_26 +; RV64-NEXT: # %bb.25: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB6_26: +; RV64-NEXT: fsh ft1, 8(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 3 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 3 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB6_28 +; RV64-NEXT: # %bb.27: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB6_28: +; RV64-NEXT: fsh ft1, 6(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 2 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 2 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB6_30 +; RV64-NEXT: # %bb.29: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB6_30: +; RV64-NEXT: fsh ft1, 4(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 1 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB6_32 +; RV64-NEXT: # %bb.31: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB6_32: +; RV64-NEXT: fsh ft1, 2(sp) +; RV64-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; RV64-NEXT: vle16.v v8, (sp) +; RV64-NEXT: addi sp, s0, -64 +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %v = select i1 %c, <16 x half> %a, <16 x half> %b + ret <16 x half> %v +} + +define <16 x half> @selectcc_v16f16(half %a, half %b, <16 x half> %c, <16 x half> %d) { +; RV32-LABEL: selectcc_v16f16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -64 +; RV32-NEXT: .cfi_def_cfa_offset 64 +; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: addi s0, sp, 64 +; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: andi sp, sp, -32 +; RV32-NEXT: feq.h a0, fa0, fa1 +; RV32-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft1, v10 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: bnez a0, .LBB7_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: fmv.h ft0, ft1 +; RV32-NEXT: .LBB7_2: +; RV32-NEXT: fsh ft0, 0(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 15 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 15 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB7_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB7_4: +; RV32-NEXT: fsh ft1, 30(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 14 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 14 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB7_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB7_6: +; RV32-NEXT: fsh ft1, 28(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 13 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 13 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB7_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB7_8: +; RV32-NEXT: fsh ft1, 26(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 12 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 12 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB7_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB7_10: +; RV32-NEXT: fsh ft1, 24(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 11 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 11 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB7_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB7_12: +; RV32-NEXT: fsh ft1, 22(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 10 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 10 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB7_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB7_14: +; RV32-NEXT: fsh ft1, 20(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 9 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 9 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB7_16 +; RV32-NEXT: # %bb.15: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB7_16: +; RV32-NEXT: fsh ft1, 18(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 8 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 8 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB7_18 +; RV32-NEXT: # %bb.17: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB7_18: +; RV32-NEXT: fsh ft1, 16(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 7 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 7 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB7_20 +; RV32-NEXT: # %bb.19: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB7_20: +; RV32-NEXT: fsh ft1, 14(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 6 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 6 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB7_22 +; RV32-NEXT: # %bb.21: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB7_22: +; RV32-NEXT: fsh ft1, 12(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 5 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 5 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB7_24 +; RV32-NEXT: # %bb.23: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB7_24: +; RV32-NEXT: fsh ft1, 10(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 4 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 4 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB7_26 +; RV32-NEXT: # %bb.25: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB7_26: +; RV32-NEXT: fsh ft1, 8(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 3 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 3 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB7_28 +; RV32-NEXT: # %bb.27: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB7_28: +; RV32-NEXT: fsh ft1, 6(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 2 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 2 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB7_30 +; RV32-NEXT: # %bb.29: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB7_30: +; RV32-NEXT: fsh ft1, 4(sp) +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 1 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB7_32 +; RV32-NEXT: # %bb.31: +; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: .LBB7_32: +; RV32-NEXT: fsh ft1, 2(sp) +; RV32-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; RV32-NEXT: vle16.v v8, (sp) +; RV32-NEXT: addi sp, s0, -64 +; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 64 +; RV32-NEXT: ret +; +; RV64-LABEL: selectcc_v16f16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: addi s0, sp, 64 +; RV64-NEXT: .cfi_def_cfa s0, 0 +; RV64-NEXT: andi sp, sp, -32 +; RV64-NEXT: feq.h a0, fa0, fa1 +; RV64-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft1, v10 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: bnez a0, .LBB7_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: fmv.h ft0, ft1 +; RV64-NEXT: .LBB7_2: +; RV64-NEXT: fsh ft0, 0(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 15 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 15 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB7_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB7_4: +; RV64-NEXT: fsh ft1, 30(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 14 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 14 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB7_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB7_6: +; RV64-NEXT: fsh ft1, 28(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 13 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 13 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB7_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB7_8: +; RV64-NEXT: fsh ft1, 26(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 12 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 12 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB7_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB7_10: +; RV64-NEXT: fsh ft1, 24(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 11 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 11 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB7_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB7_12: +; RV64-NEXT: fsh ft1, 22(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 10 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 10 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB7_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB7_14: +; RV64-NEXT: fsh ft1, 20(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 9 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 9 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB7_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB7_16: +; RV64-NEXT: fsh ft1, 18(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 8 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 8 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB7_18 +; RV64-NEXT: # %bb.17: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB7_18: +; RV64-NEXT: fsh ft1, 16(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 7 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 7 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB7_20 +; RV64-NEXT: # %bb.19: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB7_20: +; RV64-NEXT: fsh ft1, 14(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 6 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 6 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB7_22 +; RV64-NEXT: # %bb.21: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB7_22: +; RV64-NEXT: fsh ft1, 12(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 5 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 5 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB7_24 +; RV64-NEXT: # %bb.23: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB7_24: +; RV64-NEXT: fsh ft1, 10(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 4 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 4 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB7_26 +; RV64-NEXT: # %bb.25: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB7_26: +; RV64-NEXT: fsh ft1, 8(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 3 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 3 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB7_28 +; RV64-NEXT: # %bb.27: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB7_28: +; RV64-NEXT: fsh ft1, 6(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 2 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 2 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB7_30 +; RV64-NEXT: # %bb.29: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB7_30: +; RV64-NEXT: fsh ft1, 4(sp) +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 1 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB7_32 +; RV64-NEXT: # %bb.31: +; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: .LBB7_32: +; RV64-NEXT: fsh ft1, 2(sp) +; RV64-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; RV64-NEXT: vle16.v v8, (sp) +; RV64-NEXT: addi sp, s0, -64 +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %cmp = fcmp oeq half %a, %b + %v = select i1 %cmp, <16 x half> %c, <16 x half> %d + ret <16 x half> %v +} + +define <2 x float> @select_v2f32(i1 zeroext %c, <2 x float> %a, <2 x float> %b) { +; CHECK-LABEL: select_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft1, v9 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vfmv.f.s ft3, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: vfmv.f.s ft2, v25 +; CHECK-NEXT: bnez a0, .LBB8_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fmv.s ft0, ft1 +; CHECK-NEXT: fmv.s ft2, ft3 +; CHECK-NEXT: .LBB8_2: +; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v8, ft2 +; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: ret + %v = select i1 %c, <2 x float> %a, <2 x float> %b + ret <2 x float> %v +} + +define <2 x float> @selectcc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d) { +; CHECK-LABEL: selectcc_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.s a0, fa0, fa1 +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: bnez a0, .LBB9_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fmv.s ft0, ft1 +; CHECK-NEXT: .LBB9_2: +; CHECK-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vfmv.f.s ft1, v9 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: bnez a0, .LBB9_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: fmv.s ft0, ft1 +; CHECK-NEXT: .LBB9_4: +; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; CHECK-NEXT: vfmv.s.f v25, ft0 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %cmp = fcmp oeq float %a, %b + %v = select i1 %cmp, <2 x float> %c, <2 x float> %d + ret <2 x float> %v +} + +define <4 x float> @select_v4f32(i1 zeroext %c, <4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: select_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft1, v9 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: bnez a0, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fmv.s ft0, ft1 +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: fsw ft0, 0(sp) +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 3 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 3 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB10_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: fmv.s ft1, ft0 +; CHECK-NEXT: .LBB10_4: +; CHECK-NEXT: fsw ft1, 12(sp) +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 2 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB10_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: fmv.s ft1, ft0 +; CHECK-NEXT: .LBB10_6: +; CHECK-NEXT: fsw ft1, 8(sp) +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB10_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: fmv.s ft1, ft0 +; CHECK-NEXT: .LBB10_8: +; CHECK-NEXT: fsw ft1, 4(sp) +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v8, (sp) +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = select i1 %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %v +} + +define <4 x float> @selectcc_v4f32(float %a, float %b, <4 x float> %c, <4 x float> %d) { +; CHECK-LABEL: selectcc_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: feq.s a0, fa0, fa1 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft1, v9 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: bnez a0, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fmv.s ft0, ft1 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: fsw ft0, 0(sp) +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 3 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 3 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB11_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: fmv.s ft1, ft0 +; CHECK-NEXT: .LBB11_4: +; CHECK-NEXT: fsw ft1, 12(sp) +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 2 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB11_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: fmv.s ft1, ft0 +; CHECK-NEXT: .LBB11_6: +; CHECK-NEXT: fsw ft1, 8(sp) +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: bnez a0, .LBB11_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: fmv.s ft1, ft0 +; CHECK-NEXT: .LBB11_8: +; CHECK-NEXT: fsw ft1, 4(sp) +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v8, (sp) +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %cmp = fcmp oeq float %a, %b + %v = select i1 %cmp, <4 x float> %c, <4 x float> %d + ret <4 x float> %v +} + +define <8 x float> @select_v8f32(i1 zeroext %c, <8 x float> %a, <8 x float> %b) { +; RV32-LABEL: select_v8f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -64 +; RV32-NEXT: .cfi_def_cfa_offset 64 +; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: addi s0, sp, 64 +; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: andi sp, sp, -32 +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft1, v10 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: bnez a0, .LBB12_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: fmv.s ft0, ft1 +; RV32-NEXT: .LBB12_2: +; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 7 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 7 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB12_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB12_4: +; RV32-NEXT: fsw ft1, 28(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 6 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 6 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB12_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB12_6: +; RV32-NEXT: fsw ft1, 24(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 5 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 5 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB12_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB12_8: +; RV32-NEXT: fsw ft1, 20(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 4 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 4 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB12_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB12_10: +; RV32-NEXT: fsw ft1, 16(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 3 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 3 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB12_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB12_12: +; RV32-NEXT: fsw ft1, 12(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 2 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 2 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB12_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB12_14: +; RV32-NEXT: fsw ft1, 8(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 1 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB12_16 +; RV32-NEXT: # %bb.15: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB12_16: +; RV32-NEXT: fsw ft1, 4(sp) +; RV32-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; RV32-NEXT: vle32.v v8, (sp) +; RV32-NEXT: addi sp, s0, -64 +; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 64 +; RV32-NEXT: ret +; +; RV64-LABEL: select_v8f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: addi s0, sp, 64 +; RV64-NEXT: .cfi_def_cfa s0, 0 +; RV64-NEXT: andi sp, sp, -32 +; RV64-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft1, v10 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: bnez a0, .LBB12_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: fmv.s ft0, ft1 +; RV64-NEXT: .LBB12_2: +; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 7 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 7 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB12_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB12_4: +; RV64-NEXT: fsw ft1, 28(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 6 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 6 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB12_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB12_6: +; RV64-NEXT: fsw ft1, 24(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 5 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 5 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB12_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB12_8: +; RV64-NEXT: fsw ft1, 20(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 4 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 4 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB12_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB12_10: +; RV64-NEXT: fsw ft1, 16(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 3 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 3 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB12_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB12_12: +; RV64-NEXT: fsw ft1, 12(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 2 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 2 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB12_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB12_14: +; RV64-NEXT: fsw ft1, 8(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 1 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB12_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB12_16: +; RV64-NEXT: fsw ft1, 4(sp) +; RV64-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; RV64-NEXT: vle32.v v8, (sp) +; RV64-NEXT: addi sp, s0, -64 +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %v = select i1 %c, <8 x float> %a, <8 x float> %b + ret <8 x float> %v +} + +define <8 x float> @selectcc_v8f32(float %a, float %b, <8 x float> %c, <8 x float> %d) { +; RV32-LABEL: selectcc_v8f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -64 +; RV32-NEXT: .cfi_def_cfa_offset 64 +; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: addi s0, sp, 64 +; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: andi sp, sp, -32 +; RV32-NEXT: feq.s a0, fa0, fa1 +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft1, v10 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: bnez a0, .LBB13_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: fmv.s ft0, ft1 +; RV32-NEXT: .LBB13_2: +; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 7 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 7 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB13_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB13_4: +; RV32-NEXT: fsw ft1, 28(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 6 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 6 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB13_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB13_6: +; RV32-NEXT: fsw ft1, 24(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 5 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 5 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB13_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB13_8: +; RV32-NEXT: fsw ft1, 20(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 4 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 4 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB13_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB13_10: +; RV32-NEXT: fsw ft1, 16(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 3 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 3 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB13_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB13_12: +; RV32-NEXT: fsw ft1, 12(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 2 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 2 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB13_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB13_14: +; RV32-NEXT: fsw ft1, 8(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 1 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB13_16 +; RV32-NEXT: # %bb.15: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB13_16: +; RV32-NEXT: fsw ft1, 4(sp) +; RV32-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; RV32-NEXT: vle32.v v8, (sp) +; RV32-NEXT: addi sp, s0, -64 +; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 64 +; RV32-NEXT: ret +; +; RV64-LABEL: selectcc_v8f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: addi s0, sp, 64 +; RV64-NEXT: .cfi_def_cfa s0, 0 +; RV64-NEXT: andi sp, sp, -32 +; RV64-NEXT: feq.s a0, fa0, fa1 +; RV64-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft1, v10 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: bnez a0, .LBB13_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: fmv.s ft0, ft1 +; RV64-NEXT: .LBB13_2: +; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 7 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 7 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB13_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB13_4: +; RV64-NEXT: fsw ft1, 28(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 6 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 6 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB13_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB13_6: +; RV64-NEXT: fsw ft1, 24(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 5 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 5 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB13_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB13_8: +; RV64-NEXT: fsw ft1, 20(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 4 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 4 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB13_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB13_10: +; RV64-NEXT: fsw ft1, 16(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 3 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 3 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB13_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB13_12: +; RV64-NEXT: fsw ft1, 12(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 2 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 2 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB13_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB13_14: +; RV64-NEXT: fsw ft1, 8(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 1 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB13_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB13_16: +; RV64-NEXT: fsw ft1, 4(sp) +; RV64-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; RV64-NEXT: vle32.v v8, (sp) +; RV64-NEXT: addi sp, s0, -64 +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %cmp = fcmp oeq float %a, %b + %v = select i1 %cmp, <8 x float> %c, <8 x float> %d + ret <8 x float> %v +} + +define <16 x float> @select_v16f32(i1 zeroext %c, <16 x float> %a, <16 x float> %b) { +; RV32-LABEL: select_v16f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -128 +; RV32-NEXT: .cfi_def_cfa_offset 128 +; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: addi s0, sp, 128 +; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: andi sp, sp, -64 +; RV32-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft1, v12 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: bnez a0, .LBB14_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: fmv.s ft0, ft1 +; RV32-NEXT: .LBB14_2: +; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 15 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 15 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB14_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB14_4: +; RV32-NEXT: fsw ft1, 60(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 14 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 14 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB14_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB14_6: +; RV32-NEXT: fsw ft1, 56(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 13 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 13 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB14_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB14_8: +; RV32-NEXT: fsw ft1, 52(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 12 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 12 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB14_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB14_10: +; RV32-NEXT: fsw ft1, 48(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 11 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 11 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB14_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB14_12: +; RV32-NEXT: fsw ft1, 44(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 10 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 10 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB14_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB14_14: +; RV32-NEXT: fsw ft1, 40(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 9 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 9 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB14_16 +; RV32-NEXT: # %bb.15: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB14_16: +; RV32-NEXT: fsw ft1, 36(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 8 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 8 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB14_18 +; RV32-NEXT: # %bb.17: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB14_18: +; RV32-NEXT: fsw ft1, 32(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 7 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 7 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB14_20 +; RV32-NEXT: # %bb.19: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB14_20: +; RV32-NEXT: fsw ft1, 28(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 6 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 6 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB14_22 +; RV32-NEXT: # %bb.21: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB14_22: +; RV32-NEXT: fsw ft1, 24(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 5 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 5 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB14_24 +; RV32-NEXT: # %bb.23: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB14_24: +; RV32-NEXT: fsw ft1, 20(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 4 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 4 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB14_26 +; RV32-NEXT: # %bb.25: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB14_26: +; RV32-NEXT: fsw ft1, 16(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 3 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 3 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB14_28 +; RV32-NEXT: # %bb.27: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB14_28: +; RV32-NEXT: fsw ft1, 12(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 2 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 2 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB14_30 +; RV32-NEXT: # %bb.29: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB14_30: +; RV32-NEXT: fsw ft1, 8(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 1 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB14_32 +; RV32-NEXT: # %bb.31: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB14_32: +; RV32-NEXT: fsw ft1, 4(sp) +; RV32-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; RV32-NEXT: vle32.v v8, (sp) +; RV32-NEXT: addi sp, s0, -128 +; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 128 +; RV32-NEXT: ret +; +; RV64-LABEL: select_v16f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -128 +; RV64-NEXT: .cfi_def_cfa_offset 128 +; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: addi s0, sp, 128 +; RV64-NEXT: .cfi_def_cfa s0, 0 +; RV64-NEXT: andi sp, sp, -64 +; RV64-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft1, v12 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: bnez a0, .LBB14_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: fmv.s ft0, ft1 +; RV64-NEXT: .LBB14_2: +; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 15 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 15 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB14_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB14_4: +; RV64-NEXT: fsw ft1, 60(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 14 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 14 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB14_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB14_6: +; RV64-NEXT: fsw ft1, 56(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 13 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 13 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB14_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB14_8: +; RV64-NEXT: fsw ft1, 52(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 12 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 12 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB14_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB14_10: +; RV64-NEXT: fsw ft1, 48(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 11 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 11 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB14_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB14_12: +; RV64-NEXT: fsw ft1, 44(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 10 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 10 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB14_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB14_14: +; RV64-NEXT: fsw ft1, 40(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 9 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 9 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB14_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB14_16: +; RV64-NEXT: fsw ft1, 36(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 8 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 8 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB14_18 +; RV64-NEXT: # %bb.17: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB14_18: +; RV64-NEXT: fsw ft1, 32(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 7 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 7 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB14_20 +; RV64-NEXT: # %bb.19: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB14_20: +; RV64-NEXT: fsw ft1, 28(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 6 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 6 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB14_22 +; RV64-NEXT: # %bb.21: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB14_22: +; RV64-NEXT: fsw ft1, 24(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 5 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 5 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB14_24 +; RV64-NEXT: # %bb.23: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB14_24: +; RV64-NEXT: fsw ft1, 20(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 4 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 4 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB14_26 +; RV64-NEXT: # %bb.25: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB14_26: +; RV64-NEXT: fsw ft1, 16(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 3 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 3 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB14_28 +; RV64-NEXT: # %bb.27: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB14_28: +; RV64-NEXT: fsw ft1, 12(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 2 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 2 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB14_30 +; RV64-NEXT: # %bb.29: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB14_30: +; RV64-NEXT: fsw ft1, 8(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 1 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB14_32 +; RV64-NEXT: # %bb.31: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB14_32: +; RV64-NEXT: fsw ft1, 4(sp) +; RV64-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; RV64-NEXT: vle32.v v8, (sp) +; RV64-NEXT: addi sp, s0, -128 +; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 128 +; RV64-NEXT: ret + %v = select i1 %c, <16 x float> %a, <16 x float> %b + ret <16 x float> %v +} + +define <16 x float> @selectcc_v16f32(float %a, float %b, <16 x float> %c, <16 x float> %d) { +; RV32-LABEL: selectcc_v16f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -128 +; RV32-NEXT: .cfi_def_cfa_offset 128 +; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: addi s0, sp, 128 +; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: andi sp, sp, -64 +; RV32-NEXT: feq.s a0, fa0, fa1 +; RV32-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft1, v12 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: bnez a0, .LBB15_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: fmv.s ft0, ft1 +; RV32-NEXT: .LBB15_2: +; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 15 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 15 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB15_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB15_4: +; RV32-NEXT: fsw ft1, 60(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 14 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 14 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB15_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB15_6: +; RV32-NEXT: fsw ft1, 56(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 13 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 13 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB15_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB15_8: +; RV32-NEXT: fsw ft1, 52(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 12 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 12 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB15_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB15_10: +; RV32-NEXT: fsw ft1, 48(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 11 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 11 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB15_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB15_12: +; RV32-NEXT: fsw ft1, 44(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 10 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 10 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB15_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB15_14: +; RV32-NEXT: fsw ft1, 40(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 9 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 9 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB15_16 +; RV32-NEXT: # %bb.15: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB15_16: +; RV32-NEXT: fsw ft1, 36(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 8 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 8 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB15_18 +; RV32-NEXT: # %bb.17: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB15_18: +; RV32-NEXT: fsw ft1, 32(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 7 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 7 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB15_20 +; RV32-NEXT: # %bb.19: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB15_20: +; RV32-NEXT: fsw ft1, 28(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 6 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 6 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB15_22 +; RV32-NEXT: # %bb.21: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB15_22: +; RV32-NEXT: fsw ft1, 24(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 5 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 5 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB15_24 +; RV32-NEXT: # %bb.23: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB15_24: +; RV32-NEXT: fsw ft1, 20(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 4 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 4 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB15_26 +; RV32-NEXT: # %bb.25: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB15_26: +; RV32-NEXT: fsw ft1, 16(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 3 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 3 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB15_28 +; RV32-NEXT: # %bb.27: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB15_28: +; RV32-NEXT: fsw ft1, 12(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 2 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 2 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB15_30 +; RV32-NEXT: # %bb.29: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB15_30: +; RV32-NEXT: fsw ft1, 8(sp) +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 1 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB15_32 +; RV32-NEXT: # %bb.31: +; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: .LBB15_32: +; RV32-NEXT: fsw ft1, 4(sp) +; RV32-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; RV32-NEXT: vle32.v v8, (sp) +; RV32-NEXT: addi sp, s0, -128 +; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 128 +; RV32-NEXT: ret +; +; RV64-LABEL: selectcc_v16f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -128 +; RV64-NEXT: .cfi_def_cfa_offset 128 +; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: addi s0, sp, 128 +; RV64-NEXT: .cfi_def_cfa s0, 0 +; RV64-NEXT: andi sp, sp, -64 +; RV64-NEXT: feq.s a0, fa0, fa1 +; RV64-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft1, v12 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: bnez a0, .LBB15_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: fmv.s ft0, ft1 +; RV64-NEXT: .LBB15_2: +; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 15 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 15 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB15_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB15_4: +; RV64-NEXT: fsw ft1, 60(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 14 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 14 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB15_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB15_6: +; RV64-NEXT: fsw ft1, 56(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 13 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 13 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB15_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB15_8: +; RV64-NEXT: fsw ft1, 52(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 12 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 12 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB15_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB15_10: +; RV64-NEXT: fsw ft1, 48(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 11 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 11 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB15_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB15_12: +; RV64-NEXT: fsw ft1, 44(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 10 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 10 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB15_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB15_14: +; RV64-NEXT: fsw ft1, 40(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 9 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 9 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB15_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB15_16: +; RV64-NEXT: fsw ft1, 36(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 8 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 8 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB15_18 +; RV64-NEXT: # %bb.17: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB15_18: +; RV64-NEXT: fsw ft1, 32(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 7 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 7 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB15_20 +; RV64-NEXT: # %bb.19: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB15_20: +; RV64-NEXT: fsw ft1, 28(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 6 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 6 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB15_22 +; RV64-NEXT: # %bb.21: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB15_22: +; RV64-NEXT: fsw ft1, 24(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 5 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 5 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB15_24 +; RV64-NEXT: # %bb.23: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB15_24: +; RV64-NEXT: fsw ft1, 20(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 4 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 4 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB15_26 +; RV64-NEXT: # %bb.25: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB15_26: +; RV64-NEXT: fsw ft1, 16(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 3 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 3 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB15_28 +; RV64-NEXT: # %bb.27: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB15_28: +; RV64-NEXT: fsw ft1, 12(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 2 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 2 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB15_30 +; RV64-NEXT: # %bb.29: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB15_30: +; RV64-NEXT: fsw ft1, 8(sp) +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 1 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB15_32 +; RV64-NEXT: # %bb.31: +; RV64-NEXT: fmv.s ft1, ft0 +; RV64-NEXT: .LBB15_32: +; RV64-NEXT: fsw ft1, 4(sp) +; RV64-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; RV64-NEXT: vle32.v v8, (sp) +; RV64-NEXT: addi sp, s0, -128 +; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 128 +; RV64-NEXT: ret + %cmp = fcmp oeq float %a, %b + %v = select i1 %cmp, <16 x float> %c, <16 x float> %d + ret <16 x float> %v +} + +define <2 x double> @select_v2f64(i1 zeroext %c, <2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: select_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft1, v9 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vfmv.f.s ft3, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: vfmv.f.s ft2, v25 +; CHECK-NEXT: bnez a0, .LBB16_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fmv.d ft0, ft1 +; CHECK-NEXT: fmv.d ft2, ft3 +; CHECK-NEXT: .LBB16_2: +; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v8, ft2 +; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: ret + %v = select i1 %c, <2 x double> %a, <2 x double> %b + ret <2 x double> %v +} + +define <2 x double> @selectcc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d) { +; CHECK-LABEL: selectcc_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.d a0, fa0, fa1 +; CHECK-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: bnez a0, .LBB17_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fmv.d ft0, ft1 +; CHECK-NEXT: .LBB17_2: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vfmv.f.s ft1, v9 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: bnez a0, .LBB17_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: fmv.d ft0, ft1 +; CHECK-NEXT: .LBB17_4: +; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; CHECK-NEXT: vfmv.s.f v25, ft0 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %cmp = fcmp oeq double %a, %b + %v = select i1 %cmp, <2 x double> %c, <2 x double> %d + ret <2 x double> %v +} + +define <4 x double> @select_v4f64(i1 zeroext %c, <4 x double> %a, <4 x double> %b) { +; RV32-LABEL: select_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -64 +; RV32-NEXT: .cfi_def_cfa_offset 64 +; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: addi s0, sp, 64 +; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: andi sp, sp, -32 +; RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft1, v10 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: bnez a0, .LBB18_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: fmv.d ft0, ft1 +; RV32-NEXT: .LBB18_2: +; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 3 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 3 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB18_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB18_4: +; RV32-NEXT: fsd ft1, 24(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 2 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 2 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB18_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB18_6: +; RV32-NEXT: fsd ft1, 16(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 1 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB18_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB18_8: +; RV32-NEXT: fsd ft1, 8(sp) +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vle64.v v8, (sp) +; RV32-NEXT: addi sp, s0, -64 +; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 64 +; RV32-NEXT: ret +; +; RV64-LABEL: select_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: addi s0, sp, 64 +; RV64-NEXT: .cfi_def_cfa s0, 0 +; RV64-NEXT: andi sp, sp, -32 +; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft1, v10 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: bnez a0, .LBB18_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: fmv.d ft0, ft1 +; RV64-NEXT: .LBB18_2: +; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 3 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 3 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB18_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB18_4: +; RV64-NEXT: fsd ft1, 24(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 2 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 2 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB18_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB18_6: +; RV64-NEXT: fsd ft1, 16(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 1 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB18_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB18_8: +; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV64-NEXT: vle64.v v8, (sp) +; RV64-NEXT: addi sp, s0, -64 +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %v = select i1 %c, <4 x double> %a, <4 x double> %b + ret <4 x double> %v +} + +define <4 x double> @selectcc_v4f64(double %a, double %b, <4 x double> %c, <4 x double> %d) { +; RV32-LABEL: selectcc_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -64 +; RV32-NEXT: .cfi_def_cfa_offset 64 +; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: addi s0, sp, 64 +; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: andi sp, sp, -32 +; RV32-NEXT: feq.d a0, fa0, fa1 +; RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft1, v10 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: bnez a0, .LBB19_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: fmv.d ft0, ft1 +; RV32-NEXT: .LBB19_2: +; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 3 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 3 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB19_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB19_4: +; RV32-NEXT: fsd ft1, 24(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 2 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 2 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB19_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB19_6: +; RV32-NEXT: fsd ft1, 16(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: vslidedown.vi v26, v8, 1 +; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: bnez a0, .LBB19_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB19_8: +; RV32-NEXT: fsd ft1, 8(sp) +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vle64.v v8, (sp) +; RV32-NEXT: addi sp, s0, -64 +; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 64 +; RV32-NEXT: ret +; +; RV64-LABEL: selectcc_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: addi s0, sp, 64 +; RV64-NEXT: .cfi_def_cfa s0, 0 +; RV64-NEXT: andi sp, sp, -32 +; RV64-NEXT: feq.d a0, fa0, fa1 +; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft1, v10 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: bnez a0, .LBB19_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: fmv.d ft0, ft1 +; RV64-NEXT: .LBB19_2: +; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 3 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 3 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB19_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB19_4: +; RV64-NEXT: fsd ft1, 24(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 2 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 2 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB19_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB19_6: +; RV64-NEXT: fsd ft1, 16(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: vslidedown.vi v26, v8, 1 +; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: bnez a0, .LBB19_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB19_8: +; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV64-NEXT: vle64.v v8, (sp) +; RV64-NEXT: addi sp, s0, -64 +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %cmp = fcmp oeq double %a, %b + %v = select i1 %cmp, <4 x double> %c, <4 x double> %d + ret <4 x double> %v +} + +define <8 x double> @select_v8f64(i1 zeroext %c, <8 x double> %a, <8 x double> %b) { +; RV32-LABEL: select_v8f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -128 +; RV32-NEXT: .cfi_def_cfa_offset 128 +; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: addi s0, sp, 128 +; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: andi sp, sp, -64 +; RV32-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft1, v12 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: bnez a0, .LBB20_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: fmv.d ft0, ft1 +; RV32-NEXT: .LBB20_2: +; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 7 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 7 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB20_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB20_4: +; RV32-NEXT: fsd ft1, 56(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 6 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 6 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB20_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB20_6: +; RV32-NEXT: fsd ft1, 48(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 5 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 5 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB20_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB20_8: +; RV32-NEXT: fsd ft1, 40(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 4 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 4 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB20_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB20_10: +; RV32-NEXT: fsd ft1, 32(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 3 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 3 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB20_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB20_12: +; RV32-NEXT: fsd ft1, 24(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 2 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 2 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB20_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB20_14: +; RV32-NEXT: fsd ft1, 16(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 1 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB20_16 +; RV32-NEXT: # %bb.15: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB20_16: +; RV32-NEXT: fsd ft1, 8(sp) +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vle64.v v8, (sp) +; RV32-NEXT: addi sp, s0, -128 +; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 128 +; RV32-NEXT: ret +; +; RV64-LABEL: select_v8f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -128 +; RV64-NEXT: .cfi_def_cfa_offset 128 +; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: addi s0, sp, 128 +; RV64-NEXT: .cfi_def_cfa s0, 0 +; RV64-NEXT: andi sp, sp, -64 +; RV64-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft1, v12 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: bnez a0, .LBB20_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: fmv.d ft0, ft1 +; RV64-NEXT: .LBB20_2: +; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 7 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 7 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB20_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB20_4: +; RV64-NEXT: fsd ft1, 56(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 6 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 6 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB20_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB20_6: +; RV64-NEXT: fsd ft1, 48(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 5 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 5 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB20_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB20_8: +; RV64-NEXT: fsd ft1, 40(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 4 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 4 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB20_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB20_10: +; RV64-NEXT: fsd ft1, 32(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 3 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 3 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB20_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB20_12: +; RV64-NEXT: fsd ft1, 24(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 2 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 2 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB20_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB20_14: +; RV64-NEXT: fsd ft1, 16(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 1 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB20_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB20_16: +; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV64-NEXT: vle64.v v8, (sp) +; RV64-NEXT: addi sp, s0, -128 +; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 128 +; RV64-NEXT: ret + %v = select i1 %c, <8 x double> %a, <8 x double> %b + ret <8 x double> %v +} + +define <8 x double> @selectcc_v8f64(double %a, double %b, <8 x double> %c, <8 x double> %d) { +; RV32-LABEL: selectcc_v8f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -128 +; RV32-NEXT: .cfi_def_cfa_offset 128 +; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: addi s0, sp, 128 +; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: andi sp, sp, -64 +; RV32-NEXT: feq.d a0, fa0, fa1 +; RV32-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft1, v12 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: bnez a0, .LBB21_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: fmv.d ft0, ft1 +; RV32-NEXT: .LBB21_2: +; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 7 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 7 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB21_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB21_4: +; RV32-NEXT: fsd ft1, 56(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 6 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 6 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB21_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB21_6: +; RV32-NEXT: fsd ft1, 48(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 5 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 5 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB21_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB21_8: +; RV32-NEXT: fsd ft1, 40(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 4 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 4 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB21_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB21_10: +; RV32-NEXT: fsd ft1, 32(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 3 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 3 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB21_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB21_12: +; RV32-NEXT: fsd ft1, 24(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 2 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 2 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB21_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB21_14: +; RV32-NEXT: fsd ft1, 16(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: vslidedown.vi v28, v8, 1 +; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: bnez a0, .LBB21_16 +; RV32-NEXT: # %bb.15: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB21_16: +; RV32-NEXT: fsd ft1, 8(sp) +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vle64.v v8, (sp) +; RV32-NEXT: addi sp, s0, -128 +; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 128 +; RV32-NEXT: ret +; +; RV64-LABEL: selectcc_v8f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -128 +; RV64-NEXT: .cfi_def_cfa_offset 128 +; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: addi s0, sp, 128 +; RV64-NEXT: .cfi_def_cfa s0, 0 +; RV64-NEXT: andi sp, sp, -64 +; RV64-NEXT: feq.d a0, fa0, fa1 +; RV64-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft1, v12 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: bnez a0, .LBB21_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: fmv.d ft0, ft1 +; RV64-NEXT: .LBB21_2: +; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 7 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 7 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB21_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB21_4: +; RV64-NEXT: fsd ft1, 56(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 6 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 6 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB21_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB21_6: +; RV64-NEXT: fsd ft1, 48(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 5 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 5 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB21_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB21_8: +; RV64-NEXT: fsd ft1, 40(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 4 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 4 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB21_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB21_10: +; RV64-NEXT: fsd ft1, 32(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 3 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 3 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB21_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB21_12: +; RV64-NEXT: fsd ft1, 24(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 2 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 2 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB21_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB21_14: +; RV64-NEXT: fsd ft1, 16(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: vslidedown.vi v28, v8, 1 +; RV64-NEXT: vfmv.f.s ft1, v28 +; RV64-NEXT: bnez a0, .LBB21_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB21_16: +; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV64-NEXT: vle64.v v8, (sp) +; RV64-NEXT: addi sp, s0, -128 +; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 128 +; RV64-NEXT: ret + %cmp = fcmp oeq double %a, %b + %v = select i1 %cmp, <8 x double> %c, <8 x double> %d + ret <8 x double> %v +} + +define <16 x double> @select_v16f64(i1 zeroext %c, <16 x double> %a, <16 x double> %b) { +; RV32-LABEL: select_v16f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -256 +; RV32-NEXT: .cfi_def_cfa_offset 256 +; RV32-NEXT: sw ra, 252(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 248(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: addi s0, sp, 256 +; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: andi sp, sp, -128 +; RV32-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV32-NEXT: vfmv.f.s ft1, v16 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: bnez a0, .LBB22_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: fmv.d ft0, ft1 +; RV32-NEXT: .LBB22_2: +; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 15 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 15 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB22_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB22_4: +; RV32-NEXT: fsd ft1, 120(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 14 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 14 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB22_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB22_6: +; RV32-NEXT: fsd ft1, 112(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 13 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 13 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB22_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB22_8: +; RV32-NEXT: fsd ft1, 104(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 12 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 12 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB22_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB22_10: +; RV32-NEXT: fsd ft1, 96(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 11 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 11 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB22_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB22_12: +; RV32-NEXT: fsd ft1, 88(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 10 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 10 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB22_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB22_14: +; RV32-NEXT: fsd ft1, 80(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 9 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 9 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB22_16 +; RV32-NEXT: # %bb.15: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB22_16: +; RV32-NEXT: fsd ft1, 72(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 8 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 8 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB22_18 +; RV32-NEXT: # %bb.17: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB22_18: +; RV32-NEXT: fsd ft1, 64(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 7 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 7 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB22_20 +; RV32-NEXT: # %bb.19: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB22_20: +; RV32-NEXT: fsd ft1, 56(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 6 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 6 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB22_22 +; RV32-NEXT: # %bb.21: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB22_22: +; RV32-NEXT: fsd ft1, 48(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 5 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 5 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB22_24 +; RV32-NEXT: # %bb.23: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB22_24: +; RV32-NEXT: fsd ft1, 40(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 4 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 4 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB22_26 +; RV32-NEXT: # %bb.25: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB22_26: +; RV32-NEXT: fsd ft1, 32(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 3 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB22_28 +; RV32-NEXT: # %bb.27: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB22_28: +; RV32-NEXT: fsd ft1, 24(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 2 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 2 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB22_30 +; RV32-NEXT: # %bb.29: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB22_30: +; RV32-NEXT: fsd ft1, 16(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v16, 1 +; RV32-NEXT: vfmv.f.s ft0, v16 +; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: vfmv.f.s ft1, v8 +; RV32-NEXT: bnez a0, .LBB22_32 +; RV32-NEXT: # %bb.31: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB22_32: +; RV32-NEXT: fsd ft1, 8(sp) +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: vle64.v v8, (sp) +; RV32-NEXT: addi sp, s0, -256 +; RV32-NEXT: lw s0, 248(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 252(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 256 +; RV32-NEXT: ret +; +; RV64-LABEL: select_v16f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -256 +; RV64-NEXT: .cfi_def_cfa_offset 256 +; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: addi s0, sp, 256 +; RV64-NEXT: .cfi_def_cfa s0, 0 +; RV64-NEXT: andi sp, sp, -128 +; RV64-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV64-NEXT: vfmv.f.s ft1, v16 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: bnez a0, .LBB22_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: fmv.d ft0, ft1 +; RV64-NEXT: .LBB22_2: +; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 15 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 15 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB22_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB22_4: +; RV64-NEXT: fsd ft1, 120(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 14 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 14 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB22_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB22_6: +; RV64-NEXT: fsd ft1, 112(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 13 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 13 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB22_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB22_8: +; RV64-NEXT: fsd ft1, 104(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 12 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 12 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB22_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB22_10: +; RV64-NEXT: fsd ft1, 96(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 11 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 11 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB22_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB22_12: +; RV64-NEXT: fsd ft1, 88(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 10 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 10 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB22_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB22_14: +; RV64-NEXT: fsd ft1, 80(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 9 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 9 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB22_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB22_16: +; RV64-NEXT: fsd ft1, 72(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 8 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 8 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB22_18 +; RV64-NEXT: # %bb.17: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB22_18: +; RV64-NEXT: fsd ft1, 64(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 7 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 7 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB22_20 +; RV64-NEXT: # %bb.19: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB22_20: +; RV64-NEXT: fsd ft1, 56(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 6 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 6 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB22_22 +; RV64-NEXT: # %bb.21: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB22_22: +; RV64-NEXT: fsd ft1, 48(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 5 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 5 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB22_24 +; RV64-NEXT: # %bb.23: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB22_24: +; RV64-NEXT: fsd ft1, 40(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 4 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 4 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB22_26 +; RV64-NEXT: # %bb.25: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB22_26: +; RV64-NEXT: fsd ft1, 32(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 3 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 3 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB22_28 +; RV64-NEXT: # %bb.27: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB22_28: +; RV64-NEXT: fsd ft1, 24(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 2 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 2 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB22_30 +; RV64-NEXT: # %bb.29: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB22_30: +; RV64-NEXT: fsd ft1, 16(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v16, v16, 1 +; RV64-NEXT: vfmv.f.s ft0, v16 +; RV64-NEXT: vslidedown.vi v8, v8, 1 +; RV64-NEXT: vfmv.f.s ft1, v8 +; RV64-NEXT: bnez a0, .LBB22_32 +; RV64-NEXT: # %bb.31: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB22_32: +; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (sp) +; RV64-NEXT: addi sp, s0, -256 +; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 256 +; RV64-NEXT: ret + %v = select i1 %c, <16 x double> %a, <16 x double> %b + ret <16 x double> %v +} + +define <16 x double> @selectcc_v16f64(double %a, double %b, <16 x double> %c, <16 x double> %d) { +; RV32-LABEL: selectcc_v16f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -256 +; RV32-NEXT: .cfi_def_cfa_offset 256 +; RV32-NEXT: sw ra, 252(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 248(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: addi s0, sp, 256 +; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: andi sp, sp, -128 +; RV32-NEXT: feq.d a0, fa0, fa1 +; RV32-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV32-NEXT: vfmv.f.s ft1, v16 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: bnez a0, .LBB23_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: fmv.d ft0, ft1 +; RV32-NEXT: .LBB23_2: +; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 15 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 15 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB23_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB23_4: +; RV32-NEXT: fsd ft1, 120(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 14 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 14 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB23_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB23_6: +; RV32-NEXT: fsd ft1, 112(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 13 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 13 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB23_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB23_8: +; RV32-NEXT: fsd ft1, 104(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 12 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 12 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB23_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB23_10: +; RV32-NEXT: fsd ft1, 96(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 11 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 11 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB23_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB23_12: +; RV32-NEXT: fsd ft1, 88(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 10 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 10 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB23_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB23_14: +; RV32-NEXT: fsd ft1, 80(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 9 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 9 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB23_16 +; RV32-NEXT: # %bb.15: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB23_16: +; RV32-NEXT: fsd ft1, 72(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 8 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 8 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB23_18 +; RV32-NEXT: # %bb.17: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB23_18: +; RV32-NEXT: fsd ft1, 64(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 7 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 7 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB23_20 +; RV32-NEXT: # %bb.19: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB23_20: +; RV32-NEXT: fsd ft1, 56(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 6 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 6 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB23_22 +; RV32-NEXT: # %bb.21: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB23_22: +; RV32-NEXT: fsd ft1, 48(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 5 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 5 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB23_24 +; RV32-NEXT: # %bb.23: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB23_24: +; RV32-NEXT: fsd ft1, 40(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 4 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 4 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB23_26 +; RV32-NEXT: # %bb.25: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB23_26: +; RV32-NEXT: fsd ft1, 32(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 3 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB23_28 +; RV32-NEXT: # %bb.27: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB23_28: +; RV32-NEXT: fsd ft1, 24(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 2 +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: vslidedown.vi v24, v8, 2 +; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: bnez a0, .LBB23_30 +; RV32-NEXT: # %bb.29: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB23_30: +; RV32-NEXT: fsd ft1, 16(sp) +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v16, v16, 1 +; RV32-NEXT: vfmv.f.s ft0, v16 +; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: vfmv.f.s ft1, v8 +; RV32-NEXT: bnez a0, .LBB23_32 +; RV32-NEXT: # %bb.31: +; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: .LBB23_32: +; RV32-NEXT: fsd ft1, 8(sp) +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: vle64.v v8, (sp) +; RV32-NEXT: addi sp, s0, -256 +; RV32-NEXT: lw s0, 248(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 252(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 256 +; RV32-NEXT: ret +; +; RV64-LABEL: selectcc_v16f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -256 +; RV64-NEXT: .cfi_def_cfa_offset 256 +; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: addi s0, sp, 256 +; RV64-NEXT: .cfi_def_cfa s0, 0 +; RV64-NEXT: andi sp, sp, -128 +; RV64-NEXT: feq.d a0, fa0, fa1 +; RV64-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV64-NEXT: vfmv.f.s ft1, v16 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: bnez a0, .LBB23_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: fmv.d ft0, ft1 +; RV64-NEXT: .LBB23_2: +; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 15 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 15 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB23_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB23_4: +; RV64-NEXT: fsd ft1, 120(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 14 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 14 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB23_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB23_6: +; RV64-NEXT: fsd ft1, 112(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 13 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 13 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB23_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB23_8: +; RV64-NEXT: fsd ft1, 104(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 12 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 12 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB23_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB23_10: +; RV64-NEXT: fsd ft1, 96(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 11 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 11 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB23_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB23_12: +; RV64-NEXT: fsd ft1, 88(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 10 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 10 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB23_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB23_14: +; RV64-NEXT: fsd ft1, 80(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 9 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 9 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB23_16 +; RV64-NEXT: # %bb.15: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB23_16: +; RV64-NEXT: fsd ft1, 72(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 8 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 8 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB23_18 +; RV64-NEXT: # %bb.17: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB23_18: +; RV64-NEXT: fsd ft1, 64(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 7 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 7 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB23_20 +; RV64-NEXT: # %bb.19: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB23_20: +; RV64-NEXT: fsd ft1, 56(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 6 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 6 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB23_22 +; RV64-NEXT: # %bb.21: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB23_22: +; RV64-NEXT: fsd ft1, 48(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 5 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 5 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB23_24 +; RV64-NEXT: # %bb.23: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB23_24: +; RV64-NEXT: fsd ft1, 40(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 4 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 4 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB23_26 +; RV64-NEXT: # %bb.25: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB23_26: +; RV64-NEXT: fsd ft1, 32(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 3 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 3 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB23_28 +; RV64-NEXT: # %bb.27: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB23_28: +; RV64-NEXT: fsd ft1, 24(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v16, 2 +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: vslidedown.vi v24, v8, 2 +; RV64-NEXT: vfmv.f.s ft1, v24 +; RV64-NEXT: bnez a0, .LBB23_30 +; RV64-NEXT: # %bb.29: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB23_30: +; RV64-NEXT: fsd ft1, 16(sp) +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v16, v16, 1 +; RV64-NEXT: vfmv.f.s ft0, v16 +; RV64-NEXT: vslidedown.vi v8, v8, 1 +; RV64-NEXT: vfmv.f.s ft1, v8 +; RV64-NEXT: bnez a0, .LBB23_32 +; RV64-NEXT: # %bb.31: +; RV64-NEXT: fmv.d ft1, ft0 +; RV64-NEXT: .LBB23_32: +; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vle64.v v8, (sp) +; RV64-NEXT: addi sp, s0, -256 +; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 256 +; RV64-NEXT: ret + %cmp = fcmp oeq double %a, %b + %v = select i1 %cmp, <16 x double> %c, <16 x double> %d + ret <16 x double> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll @@ -0,0 +1,1000 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +define <1 x i1> @select_v1i1(i1 zeroext %c, <1 x i1> %a, <1 x i1> %b) { +; CHECK-LABEL: select_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: bnez a0, .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vmandnot.mm v25, v26, v8 +; CHECK-NEXT: vmand.mm v26, v0, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 +; CHECK-NEXT: ret + %v = select i1 %c, <1 x i1> %a, <1 x i1> %b + ret <1 x i1> %v +} + +define <1 x i1> @selectcc_v1i1(i1 signext %a, i1 signext %b, <1 x i1> %c, <1 x i1> %d) { +; CHECK-LABEL: selectcc_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: andi a1, a0, 1 +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: bnez a1, .LBB1_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vmandnot.mm v25, v26, v8 +; CHECK-NEXT: vmand.mm v26, v0, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 +; CHECK-NEXT: ret + %cmp = icmp ne i1 %a, %b + %v = select i1 %cmp, <1 x i1> %c, <1 x i1> %d + ret <1 x i1> %v +} + +define <2 x i1> @select_v2i1(i1 zeroext %c, <2 x i1> %a, <2 x i1> %b) { +; CHECK-LABEL: select_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: bnez a0, .LBB2_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vmandnot.mm v25, v26, v8 +; CHECK-NEXT: vmand.mm v26, v0, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 +; CHECK-NEXT: ret + %v = select i1 %c, <2 x i1> %a, <2 x i1> %b + ret <2 x i1> %v +} + +define <2 x i1> @selectcc_v2i1(i1 signext %a, i1 signext %b, <2 x i1> %c, <2 x i1> %d) { +; CHECK-LABEL: selectcc_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: andi a1, a0, 1 +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: bnez a1, .LBB3_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vmandnot.mm v25, v26, v8 +; CHECK-NEXT: vmand.mm v26, v0, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 +; CHECK-NEXT: ret + %cmp = icmp ne i1 %a, %b + %v = select i1 %cmp, <2 x i1> %c, <2 x i1> %d + ret <2 x i1> %v +} + +define <4 x i1> @select_v4i1(i1 zeroext %c, <4 x i1> %a, <4 x i1> %b) { +; CHECK-LABEL: select_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: bnez a0, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vmandnot.mm v25, v26, v8 +; CHECK-NEXT: vmand.mm v26, v0, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 +; CHECK-NEXT: ret + %v = select i1 %c, <4 x i1> %a, <4 x i1> %b + ret <4 x i1> %v +} + +define <4 x i1> @selectcc_v4i1(i1 signext %a, i1 signext %b, <4 x i1> %c, <4 x i1> %d) { +; CHECK-LABEL: selectcc_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: andi a1, a0, 1 +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: bnez a1, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vmandnot.mm v25, v26, v8 +; CHECK-NEXT: vmand.mm v26, v0, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 +; CHECK-NEXT: ret + %cmp = icmp ne i1 %a, %b + %v = select i1 %cmp, <4 x i1> %c, <4 x i1> %d + ret <4 x i1> %v +} + +define <8 x i1> @select_v8i1(i1 zeroext %c, <8 x i1> %a, <8 x i1> %b) { +; CHECK-LABEL: select_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: bnez a0, .LBB6_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vmandnot.mm v25, v26, v8 +; CHECK-NEXT: vmand.mm v26, v0, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 +; CHECK-NEXT: ret + %v = select i1 %c, <8 x i1> %a, <8 x i1> %b + ret <8 x i1> %v +} + +define <8 x i1> @selectcc_v8i1(i1 signext %a, i1 signext %b, <8 x i1> %c, <8 x i1> %d) { +; CHECK-LABEL: selectcc_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: andi a1, a0, 1 +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: bnez a1, .LBB7_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vmandnot.mm v25, v26, v8 +; CHECK-NEXT: vmand.mm v26, v0, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 +; CHECK-NEXT: ret + %cmp = icmp ne i1 %a, %b + %v = select i1 %cmp, <8 x i1> %c, <8 x i1> %d + ret <8 x i1> %v +} + +define <16 x i1> @select_v16i1(i1 zeroext %c, <16 x i1> %a, <16 x i1> %b) { +; CHECK-LABEL: select_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: bnez a0, .LBB8_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB8_2: +; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vmandnot.mm v25, v26, v8 +; CHECK-NEXT: vmand.mm v26, v0, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 +; CHECK-NEXT: ret + %v = select i1 %c, <16 x i1> %a, <16 x i1> %b + ret <16 x i1> %v +} + +define <16 x i1> @selectcc_v16i1(i1 signext %a, i1 signext %b, <16 x i1> %c, <16 x i1> %d) { +; CHECK-LABEL: selectcc_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: andi a1, a0, 1 +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: bnez a1, .LBB9_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB9_2: +; CHECK-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vmandnot.mm v25, v26, v8 +; CHECK-NEXT: vmand.mm v26, v0, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 +; CHECK-NEXT: ret + %cmp = icmp ne i1 %a, %b + %v = select i1 %cmp, <16 x i1> %c, <16 x i1> %d + ret <16 x i1> %v +} + +define <2 x i8> @select_v2i8(i1 zeroext %c, <2 x i8> %a, <2 x i8> %b) { +; CHECK-LABEL: select_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, <2 x i8> %a, <2 x i8> %b + ret <2 x i8> %v +} + +define <2 x i8> @selectcc_v2i8(i8 signext %a, i8 signext %b, <2 x i8> %c, <2 x i8> %d) { +; CHECK-LABEL: selectcc_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a2 +; CHECK-NEXT: vmv.v.x v26, a2 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i8 %a, %b + %v = select i1 %cmp, <2 x i8> %c, <2 x i8> %d + ret <2 x i8> %v +} + +define <4 x i8> @select_v4i8(i1 zeroext %c, <4 x i8> %a, <4 x i8> %b) { +; CHECK-LABEL: select_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB12_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB12_2: +; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, <4 x i8> %a, <4 x i8> %b + ret <4 x i8> %v +} + +define <4 x i8> @selectcc_v4i8(i8 signext %a, i8 signext %b, <4 x i8> %c, <4 x i8> %d) { +; CHECK-LABEL: selectcc_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB13_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB13_2: +; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a2 +; CHECK-NEXT: vmv.v.x v26, a2 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i8 %a, %b + %v = select i1 %cmp, <4 x i8> %c, <4 x i8> %d + ret <4 x i8> %v +} + +define <8 x i8> @select_v8i8(i1 zeroext %c, <8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: select_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB14_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB14_2: +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, <8 x i8> %a, <8 x i8> %b + ret <8 x i8> %v +} + +define <8 x i8> @selectcc_v8i8(i8 signext %a, i8 signext %b, <8 x i8> %c, <8 x i8> %d) { +; CHECK-LABEL: selectcc_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB15_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB15_2: +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a2 +; CHECK-NEXT: vmv.v.x v26, a2 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i8 %a, %b + %v = select i1 %cmp, <8 x i8> %c, <8 x i8> %d + ret <8 x i8> %v +} + +define <16 x i8> @select_v16i8(i1 zeroext %c, <16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: select_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB16_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB16_2: +; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %v +} + +define <16 x i8> @selectcc_v16i8(i8 signext %a, i8 signext %b, <16 x i8> %c, <16 x i8> %d) { +; CHECK-LABEL: selectcc_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB17_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB17_2: +; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a2 +; CHECK-NEXT: vmv.v.x v26, a2 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i8 %a, %b + %v = select i1 %cmp, <16 x i8> %c, <16 x i8> %d + ret <16 x i8> %v +} + +define <2 x i16> @select_v2i16(i1 zeroext %c, <2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: select_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB18_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB18_2: +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, <2 x i16> %a, <2 x i16> %b + ret <2 x i16> %v +} + +define <2 x i16> @selectcc_v2i16(i16 signext %a, i16 signext %b, <2 x i16> %c, <2 x i16> %d) { +; CHECK-LABEL: selectcc_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB19_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB19_2: +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a2 +; CHECK-NEXT: vmv.v.x v26, a2 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i16 %a, %b + %v = select i1 %cmp, <2 x i16> %c, <2 x i16> %d + ret <2 x i16> %v +} + +define <4 x i16> @select_v4i16(i1 zeroext %c, <4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: select_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB20_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB20_2: +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, <4 x i16> %a, <4 x i16> %b + ret <4 x i16> %v +} + +define <4 x i16> @selectcc_v4i16(i16 signext %a, i16 signext %b, <4 x i16> %c, <4 x i16> %d) { +; CHECK-LABEL: selectcc_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB21_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB21_2: +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a2 +; CHECK-NEXT: vmv.v.x v26, a2 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i16 %a, %b + %v = select i1 %cmp, <4 x i16> %c, <4 x i16> %d + ret <4 x i16> %v +} + +define <8 x i16> @select_v8i16(i1 zeroext %c, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: select_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB22_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %v +} + +define <8 x i16> @selectcc_v8i16(i16 signext %a, i16 signext %b, <8 x i16> %c, <8 x i16> %d) { +; CHECK-LABEL: selectcc_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB23_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a2 +; CHECK-NEXT: vmv.v.x v26, a2 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i16 %a, %b + %v = select i1 %cmp, <8 x i16> %c, <8 x i16> %d + ret <8 x i16> %v +} + +define <16 x i16> @select_v16i16(i1 zeroext %c, <16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: select_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB24_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB24_2: +; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; CHECK-NEXT: vand.vx v26, v8, a1 +; CHECK-NEXT: vmv.v.x v28, a1 +; CHECK-NEXT: vxor.vi v28, v28, -1 +; CHECK-NEXT: vand.vv v28, v10, v28 +; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: ret + %v = select i1 %c, <16 x i16> %a, <16 x i16> %b + ret <16 x i16> %v +} + +define <16 x i16> @selectcc_v16i16(i16 signext %a, i16 signext %b, <16 x i16> %c, <16 x i16> %d) { +; CHECK-LABEL: selectcc_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB25_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; CHECK-NEXT: vand.vx v26, v8, a2 +; CHECK-NEXT: vmv.v.x v28, a2 +; CHECK-NEXT: vxor.vi v28, v28, -1 +; CHECK-NEXT: vand.vv v28, v10, v28 +; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: ret + %cmp = icmp ne i16 %a, %b + %v = select i1 %cmp, <16 x i16> %c, <16 x i16> %d + ret <16 x i16> %v +} + +define <2 x i32> @select_v2i32(i1 zeroext %c, <2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: select_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB26_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, <2 x i32> %a, <2 x i32> %b + ret <2 x i32> %v +} + +define <2 x i32> @selectcc_v2i32(i32 signext %a, i32 signext %b, <2 x i32> %c, <2 x i32> %d) { +; CHECK-LABEL: selectcc_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB27_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a2 +; CHECK-NEXT: vmv.v.x v26, a2 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i32 %a, %b + %v = select i1 %cmp, <2 x i32> %c, <2 x i32> %d + ret <2 x i32> %v +} + +define <4 x i32> @select_v4i32(i1 zeroext %c, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: select_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB28_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB28_2: +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %v +} + +define <4 x i32> @selectcc_v4i32(i32 signext %a, i32 signext %b, <4 x i32> %c, <4 x i32> %d) { +; CHECK-LABEL: selectcc_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB29_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB29_2: +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a2 +; CHECK-NEXT: vmv.v.x v26, a2 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i32 %a, %b + %v = select i1 %cmp, <4 x i32> %c, <4 x i32> %d + ret <4 x i32> %v +} + +define <8 x i32> @select_v8i32(i1 zeroext %c, <8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: select_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB30_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB30_2: +; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; CHECK-NEXT: vand.vx v26, v8, a1 +; CHECK-NEXT: vmv.v.x v28, a1 +; CHECK-NEXT: vxor.vi v28, v28, -1 +; CHECK-NEXT: vand.vv v28, v10, v28 +; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: ret + %v = select i1 %c, <8 x i32> %a, <8 x i32> %b + ret <8 x i32> %v +} + +define <8 x i32> @selectcc_v8i32(i32 signext %a, i32 signext %b, <8 x i32> %c, <8 x i32> %d) { +; CHECK-LABEL: selectcc_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB31_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB31_2: +; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; CHECK-NEXT: vand.vx v26, v8, a2 +; CHECK-NEXT: vmv.v.x v28, a2 +; CHECK-NEXT: vxor.vi v28, v28, -1 +; CHECK-NEXT: vand.vv v28, v10, v28 +; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: ret + %cmp = icmp ne i32 %a, %b + %v = select i1 %cmp, <8 x i32> %c, <8 x i32> %d + ret <8 x i32> %v +} + +define <16 x i32> @select_v16i32(i1 zeroext %c, <16 x i32> %a, <16 x i32> %b) { +; CHECK-LABEL: select_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB32_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; CHECK-NEXT: vand.vx v28, v8, a1 +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vxor.vi v8, v8, -1 +; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: ret + %v = select i1 %c, <16 x i32> %a, <16 x i32> %b + ret <16 x i32> %v +} + +define <16 x i32> @selectcc_v16i32(i32 signext %a, i32 signext %b, <16 x i32> %c, <16 x i32> %d) { +; CHECK-LABEL: selectcc_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB33_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; CHECK-NEXT: vand.vx v28, v8, a2 +; CHECK-NEXT: vmv.v.x v8, a2 +; CHECK-NEXT: vxor.vi v8, v8, -1 +; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: ret + %cmp = icmp ne i32 %a, %b + %v = select i1 %cmp, <16 x i32> %c, <16 x i32> %d + ret <16 x i32> %v +} + +define <2 x i64> @select_v2i64(i1 zeroext %c, <2 x i64> %a, <2 x i64> %b) { +; RV32-LABEL: select_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: bnez a0, .LBB34_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB34_2: +; RV32-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.x v25, a1 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vand.vv v26, v8, v25 +; RV32-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v27, -1 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vxor.vv v25, v25, v27 +; RV32-NEXT: vand.vv v25, v9, v25 +; RV32-NEXT: vor.vv v8, v26, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: select_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bnez a0, .LBB34_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB34_2: +; RV64-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV64-NEXT: vand.vx v25, v8, a1 +; RV64-NEXT: vmv.v.x v26, a1 +; RV64-NEXT: vxor.vi v26, v26, -1 +; RV64-NEXT: vand.vv v26, v9, v26 +; RV64-NEXT: vor.vv v8, v25, v26 +; RV64-NEXT: ret + %v = select i1 %c, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %v +} + +define <2 x i64> @selectcc_v2i64(i64 signext %a, i64 signext %b, <2 x i64> %c, <2 x i64> %d) { +; RV32-LABEL: selectcc_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: xor a1, a1, a3 +; RV32-NEXT: xor a0, a0, a2 +; RV32-NEXT: or a1, a0, a1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: bnez a1, .LBB35_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB35_2: +; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.x v25, a0 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vand.vv v26, v8, v25 +; RV32-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v27, -1 +; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV32-NEXT: vxor.vv v25, v25, v27 +; RV32-NEXT: vand.vv v25, v9, v25 +; RV32-NEXT: vor.vv v8, v26, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: selectcc_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a0, a1, .LBB35_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB35_2: +; RV64-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; RV64-NEXT: vand.vx v25, v8, a2 +; RV64-NEXT: vmv.v.x v26, a2 +; RV64-NEXT: vxor.vi v26, v26, -1 +; RV64-NEXT: vand.vv v26, v9, v26 +; RV64-NEXT: vor.vv v8, v25, v26 +; RV64-NEXT: ret + %cmp = icmp ne i64 %a, %b + %v = select i1 %cmp, <2 x i64> %c, <2 x i64> %d + ret <2 x i64> %v +} + +define <4 x i64> @select_v4i64(i1 zeroext %c, <4 x i64> %a, <4 x i64> %b) { +; RV32-LABEL: select_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: bnez a0, .LBB36_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB36_2: +; RV32-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; RV32-NEXT: vmv.v.x v26, a1 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vand.vv v28, v8, v26 +; RV32-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; RV32-NEXT: vmv.v.i v30, -1 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vxor.vv v26, v26, v30 +; RV32-NEXT: vand.vv v26, v10, v26 +; RV32-NEXT: vor.vv v8, v28, v26 +; RV32-NEXT: ret +; +; RV64-LABEL: select_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bnez a0, .LBB36_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB36_2: +; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV64-NEXT: vand.vx v26, v8, a1 +; RV64-NEXT: vmv.v.x v28, a1 +; RV64-NEXT: vxor.vi v28, v28, -1 +; RV64-NEXT: vand.vv v28, v10, v28 +; RV64-NEXT: vor.vv v8, v26, v28 +; RV64-NEXT: ret + %v = select i1 %c, <4 x i64> %a, <4 x i64> %b + ret <4 x i64> %v +} + +define <4 x i64> @selectcc_v4i64(i64 signext %a, i64 signext %b, <4 x i64> %c, <4 x i64> %d) { +; RV32-LABEL: selectcc_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: xor a1, a1, a3 +; RV32-NEXT: xor a0, a0, a2 +; RV32-NEXT: or a1, a0, a1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: bnez a1, .LBB37_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB37_2: +; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; RV32-NEXT: vmv.v.x v26, a0 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vand.vv v28, v8, v26 +; RV32-NEXT: vsetivli a0, 8, e32,m2,ta,mu +; RV32-NEXT: vmv.v.i v30, -1 +; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32-NEXT: vxor.vv v26, v26, v30 +; RV32-NEXT: vand.vv v26, v10, v26 +; RV32-NEXT: vor.vv v8, v28, v26 +; RV32-NEXT: ret +; +; RV64-LABEL: selectcc_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a0, a1, .LBB37_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB37_2: +; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV64-NEXT: vand.vx v26, v8, a2 +; RV64-NEXT: vmv.v.x v28, a2 +; RV64-NEXT: vxor.vi v28, v28, -1 +; RV64-NEXT: vand.vv v28, v10, v28 +; RV64-NEXT: vor.vv v8, v26, v28 +; RV64-NEXT: ret + %cmp = icmp ne i64 %a, %b + %v = select i1 %cmp, <4 x i64> %c, <4 x i64> %d + ret <4 x i64> %v +} + +define <8 x i64> @select_v8i64(i1 zeroext %c, <8 x i64> %a, <8 x i64> %b) { +; RV32-LABEL: select_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: bnez a0, .LBB38_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB38_2: +; RV32-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.x v28, a1 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vand.vv v8, v8, v28 +; RV32-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v16, -1 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vxor.vv v28, v28, v16 +; RV32-NEXT: vand.vv v28, v12, v28 +; RV32-NEXT: vor.vv v8, v8, v28 +; RV32-NEXT: ret +; +; RV64-LABEL: select_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bnez a0, .LBB38_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB38_2: +; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV64-NEXT: vand.vx v28, v8, a1 +; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: vand.vv v8, v12, v8 +; RV64-NEXT: vor.vv v8, v28, v8 +; RV64-NEXT: ret + %v = select i1 %c, <8 x i64> %a, <8 x i64> %b + ret <8 x i64> %v +} + +define <8 x i64> @selectcc_v8i64(i64 signext %a, i64 signext %b, <8 x i64> %c, <8 x i64> %d) { +; RV32-LABEL: selectcc_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: xor a1, a1, a3 +; RV32-NEXT: xor a0, a0, a2 +; RV32-NEXT: or a1, a0, a1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: bnez a1, .LBB39_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB39_2: +; RV32-NEXT: vsetivli a1, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.x v28, a0 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vand.vv v8, v8, v28 +; RV32-NEXT: vsetivli a0, 16, e32,m4,ta,mu +; RV32-NEXT: vmv.v.i v16, -1 +; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV32-NEXT: vxor.vv v28, v28, v16 +; RV32-NEXT: vand.vv v28, v12, v28 +; RV32-NEXT: vor.vv v8, v8, v28 +; RV32-NEXT: ret +; +; RV64-LABEL: selectcc_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a0, a1, .LBB39_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB39_2: +; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu +; RV64-NEXT: vand.vx v28, v8, a2 +; RV64-NEXT: vmv.v.x v8, a2 +; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: vand.vv v8, v12, v8 +; RV64-NEXT: vor.vv v8, v28, v8 +; RV64-NEXT: ret + %cmp = icmp ne i64 %a, %b + %v = select i1 %cmp, <8 x i64> %c, <8 x i64> %d + ret <8 x i64> %v +} + +define <16 x i64> @select_v16i64(i1 zeroext %c, <16 x i64> %a, <16 x i64> %b) { +; RV32-LABEL: select_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: bnez a0, .LBB40_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB40_2: +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsetvli a2, a0, e32,m8,ta,mu +; RV32-NEXT: vmv.v.x v24, a1 +; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vsetvli a0, a0, e32,m8,ta,mu +; RV32-NEXT: vmv.v.i v0, -1 +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: select_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bnez a0, .LBB40_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB40_2: +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vmv.v.x v24, a1 +; RV64-NEXT: vxor.vi v24, v24, -1 +; RV64-NEXT: vand.vv v16, v16, v24 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: ret + %v = select i1 %c, <16 x i64> %a, <16 x i64> %b + ret <16 x i64> %v +} + +define <16 x i64> @selectcc_v16i64(i64 signext %a, i64 signext %b, <16 x i64> %c, <16 x i64> %d) { +; RV32-LABEL: selectcc_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: xor a1, a1, a3 +; RV32-NEXT: xor a0, a0, a2 +; RV32-NEXT: or a1, a0, a1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: bnez a1, .LBB41_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB41_2: +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; RV32-NEXT: vmv.v.x v24, a0 +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; RV32-NEXT: vmv.v.i v0, -1 +; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: selectcc_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a0, a1, .LBB41_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB41_2: +; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vmv.v.x v24, a2 +; RV64-NEXT: vxor.vi v24, v24, -1 +; RV64-NEXT: vand.vv v16, v16, v24 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: ret + %cmp = icmp ne i64 %a, %b + %v = select i1 %cmp, <16 x i64> %c, <16 x i64> %d + ret <16 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll @@ -0,0 +1,777 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +define @select_nxv1f16(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv1f16(half %a, half %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.h a1, fa0, fa1 +; CHECK-NEXT: addi a0, zero, -1 +; CHECK-NEXT: bnez a1, .LBB1_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a0 +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = fcmp oeq half %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv2f16(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB2_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv2f16(half %a, half %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.h a1, fa0, fa1 +; CHECK-NEXT: addi a0, zero, -1 +; CHECK-NEXT: bnez a1, .LBB3_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a0 +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = fcmp oeq half %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv4f16(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv4f16(half %a, half %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.h a1, fa0, fa1 +; CHECK-NEXT: addi a0, zero, -1 +; CHECK-NEXT: bnez a1, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a0 +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = fcmp oeq half %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv8f16(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB6_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vand.vx v26, v8, a1 +; CHECK-NEXT: vmv.v.x v28, a1 +; CHECK-NEXT: vxor.vi v28, v28, -1 +; CHECK-NEXT: vand.vv v28, v10, v28 +; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv8f16(half %a, half %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.h a1, fa0, fa1 +; CHECK-NEXT: addi a0, zero, -1 +; CHECK-NEXT: bnez a1, .LBB7_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; CHECK-NEXT: vand.vx v26, v8, a0 +; CHECK-NEXT: vmv.v.x v28, a0 +; CHECK-NEXT: vxor.vi v28, v28, -1 +; CHECK-NEXT: vand.vv v28, v10, v28 +; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: ret + %cmp = fcmp oeq half %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv16f16(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB8_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB8_2: +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: vand.vx v28, v8, a1 +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vxor.vi v8, v8, -1 +; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv16f16(half %a, half %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.h a1, fa0, fa1 +; CHECK-NEXT: addi a0, zero, -1 +; CHECK-NEXT: bnez a1, .LBB9_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB9_2: +; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu +; CHECK-NEXT: vand.vx v28, v8, a0 +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vxor.vi v8, v8, -1 +; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: ret + %cmp = fcmp oeq half %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv32f16(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vmv.v.x v24, a1 +; CHECK-NEXT: vxor.vi v24, v24, -1 +; CHECK-NEXT: vand.vv v16, v16, v24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv32f16(half %a, half %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.h a1, fa0, fa1 +; CHECK-NEXT: addi a0, zero, -1 +; CHECK-NEXT: bnez a1, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vmv.v.x v24, a0 +; CHECK-NEXT: vxor.vi v24, v24, -1 +; CHECK-NEXT: vand.vv v16, v16, v24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: ret + %cmp = fcmp oeq half %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv1f32(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB12_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB12_2: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv1f32(float %a, float %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.s a1, fa0, fa1 +; CHECK-NEXT: addi a0, zero, -1 +; CHECK-NEXT: bnez a1, .LBB13_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB13_2: +; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a0 +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = fcmp oeq float %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv2f32(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB14_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB14_2: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv2f32(float %a, float %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.s a1, fa0, fa1 +; CHECK-NEXT: addi a0, zero, -1 +; CHECK-NEXT: bnez a1, .LBB15_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB15_2: +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a0 +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = fcmp oeq float %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv4f32(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB16_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB16_2: +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vand.vx v26, v8, a1 +; CHECK-NEXT: vmv.v.x v28, a1 +; CHECK-NEXT: vxor.vi v28, v28, -1 +; CHECK-NEXT: vand.vv v28, v10, v28 +; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv4f32(float %a, float %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.s a1, fa0, fa1 +; CHECK-NEXT: addi a0, zero, -1 +; CHECK-NEXT: bnez a1, .LBB17_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB17_2: +; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu +; CHECK-NEXT: vand.vx v26, v8, a0 +; CHECK-NEXT: vmv.v.x v28, a0 +; CHECK-NEXT: vxor.vi v28, v28, -1 +; CHECK-NEXT: vand.vv v28, v10, v28 +; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: ret + %cmp = fcmp oeq float %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv8f32(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB18_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB18_2: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vand.vx v28, v8, a1 +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vxor.vi v8, v8, -1 +; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv8f32(float %a, float %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.s a1, fa0, fa1 +; CHECK-NEXT: addi a0, zero, -1 +; CHECK-NEXT: bnez a1, .LBB19_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB19_2: +; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: vand.vx v28, v8, a0 +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vxor.vi v8, v8, -1 +; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: ret + %cmp = fcmp oeq float %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv16f32(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB20_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB20_2: +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vmv.v.x v24, a1 +; CHECK-NEXT: vxor.vi v24, v24, -1 +; CHECK-NEXT: vand.vv v16, v16, v24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv16f32(float %a, float %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.s a1, fa0, fa1 +; CHECK-NEXT: addi a0, zero, -1 +; CHECK-NEXT: bnez a1, .LBB21_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB21_2: +; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vmv.v.x v24, a0 +; CHECK-NEXT: vxor.vi v24, v24, -1 +; CHECK-NEXT: vand.vv v16, v16, v24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: ret + %cmp = fcmp oeq float %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv1f64(i1 zeroext %c, %a, %b) { +; RV32-LABEL: select_nxv1f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: bnez a0, .LBB22_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB22_2: +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vand.vv v26, v8, v25 +; RV32-NEXT: vxor.vi v25, v25, -1 +; RV32-NEXT: vand.vv v25, v9, v25 +; RV32-NEXT: vor.vv v8, v26, v25 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: select_nxv1f64: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bnez a0, .LBB22_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB22_2: +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vand.vx v25, v8, a1 +; RV64-NEXT: vmv.v.x v26, a1 +; RV64-NEXT: vxor.vi v26, v26, -1 +; RV64-NEXT: vand.vv v26, v9, v26 +; RV64-NEXT: vor.vv v8, v25, v26 +; RV64-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv1f64(double %a, double %b, %c, %d) { +; RV32-LABEL: selectcc_nxv1f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: feq.d a1, fa0, fa1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: bnez a1, .LBB23_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB23_2: +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vand.vv v26, v8, v25 +; RV32-NEXT: vxor.vi v25, v25, -1 +; RV32-NEXT: vand.vv v25, v9, v25 +; RV32-NEXT: vor.vv v8, v26, v25 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: selectcc_nxv1f64: +; RV64: # %bb.0: +; RV64-NEXT: feq.d a1, fa0, fa1 +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: bnez a1, .LBB23_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB23_2: +; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; RV64-NEXT: vand.vx v25, v8, a0 +; RV64-NEXT: vmv.v.x v26, a0 +; RV64-NEXT: vxor.vi v26, v26, -1 +; RV64-NEXT: vand.vv v26, v9, v26 +; RV64-NEXT: vor.vv v8, v25, v26 +; RV64-NEXT: ret + %cmp = fcmp oeq double %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv2f64(i1 zeroext %c, %a, %b) { +; RV32-LABEL: select_nxv2f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: bnez a0, .LBB24_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB24_2: +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vand.vv v28, v8, v26 +; RV32-NEXT: vxor.vi v26, v26, -1 +; RV32-NEXT: vand.vv v26, v10, v26 +; RV32-NEXT: vor.vv v8, v28, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: select_nxv2f64: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bnez a0, .LBB24_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB24_2: +; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV64-NEXT: vand.vx v26, v8, a1 +; RV64-NEXT: vmv.v.x v28, a1 +; RV64-NEXT: vxor.vi v28, v28, -1 +; RV64-NEXT: vand.vv v28, v10, v28 +; RV64-NEXT: vor.vv v8, v26, v28 +; RV64-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv2f64(double %a, double %b, %c, %d) { +; RV32-LABEL: selectcc_nxv2f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: feq.d a1, fa0, fa1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: bnez a1, .LBB25_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB25_2: +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vand.vv v28, v8, v26 +; RV32-NEXT: vxor.vi v26, v26, -1 +; RV32-NEXT: vand.vv v26, v10, v26 +; RV32-NEXT: vor.vv v8, v28, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: selectcc_nxv2f64: +; RV64: # %bb.0: +; RV64-NEXT: feq.d a1, fa0, fa1 +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: bnez a1, .LBB25_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB25_2: +; RV64-NEXT: vsetvli a1, zero, e64,m2,ta,mu +; RV64-NEXT: vand.vx v26, v8, a0 +; RV64-NEXT: vmv.v.x v28, a0 +; RV64-NEXT: vxor.vi v28, v28, -1 +; RV64-NEXT: vand.vv v28, v10, v28 +; RV64-NEXT: vor.vv v8, v26, v28 +; RV64-NEXT: ret + %cmp = fcmp oeq double %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv4f64(i1 zeroext %c, %a, %b) { +; RV32-LABEL: select_nxv4f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: bnez a0, .LBB26_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB26_2: +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vand.vv v8, v8, v28 +; RV32-NEXT: vxor.vi v28, v28, -1 +; RV32-NEXT: vand.vv v28, v12, v28 +; RV32-NEXT: vor.vv v8, v8, v28 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: select_nxv4f64: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bnez a0, .LBB26_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB26_2: +; RV64-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV64-NEXT: vand.vx v28, v8, a1 +; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: vand.vv v8, v12, v8 +; RV64-NEXT: vor.vv v8, v28, v8 +; RV64-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv4f64(double %a, double %b, %c, %d) { +; RV32-LABEL: selectcc_nxv4f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: feq.d a1, fa0, fa1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: bnez a1, .LBB27_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB27_2: +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vand.vv v8, v8, v28 +; RV32-NEXT: vxor.vi v28, v28, -1 +; RV32-NEXT: vand.vv v28, v12, v28 +; RV32-NEXT: vor.vv v8, v8, v28 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: selectcc_nxv4f64: +; RV64: # %bb.0: +; RV64-NEXT: feq.d a1, fa0, fa1 +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: bnez a1, .LBB27_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB27_2: +; RV64-NEXT: vsetvli a1, zero, e64,m4,ta,mu +; RV64-NEXT: vand.vx v28, v8, a0 +; RV64-NEXT: vmv.v.x v8, a0 +; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: vand.vv v8, v12, v8 +; RV64-NEXT: vor.vv v8, v28, v8 +; RV64-NEXT: ret + %cmp = fcmp oeq double %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv8f64(i1 zeroext %c, %a, %b) { +; RV32-LABEL: select_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: bnez a0, .LBB28_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB28_2: +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vxor.vi v24, v24, -1 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: select_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bnez a0, .LBB28_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB28_2: +; RV64-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vmv.v.x v24, a1 +; RV64-NEXT: vxor.vi v24, v24, -1 +; RV64-NEXT: vand.vv v16, v16, v24 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv8f64(double %a, double %b, %c, %d) { +; RV32-LABEL: selectcc_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: feq.d a1, fa0, fa1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: bnez a1, .LBB29_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB29_2: +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vxor.vi v24, v24, -1 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: selectcc_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: feq.d a1, fa0, fa1 +; RV64-NEXT: addi a0, zero, -1 +; RV64-NEXT: bnez a1, .LBB29_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, zero +; RV64-NEXT: .LBB29_2: +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vmv.v.x v24, a0 +; RV64-NEXT: vxor.vi v24, v24, -1 +; RV64-NEXT: vand.vv v16, v16, v24 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: ret + %cmp = fcmp oeq double %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/select-int.ll b/llvm/test/CodeGen/RISCV/rvv/select-int.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/select-int.ll @@ -0,0 +1,1330 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +define @select_nxv1i1(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: bnez a0, .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vmandnot.mm v25, v8, v26 +; CHECK-NEXT: vmand.mm v26, v0, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv1i1(i1 signext %a, i1 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: andi a1, a0, 1 +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: bnez a1, .LBB1_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vmandnot.mm v25, v8, v26 +; CHECK-NEXT: vmand.mm v26, v0, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 +; CHECK-NEXT: ret + %cmp = icmp ne i1 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv2i1(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: bnez a0, .LBB2_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vmandnot.mm v25, v8, v26 +; CHECK-NEXT: vmand.mm v26, v0, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv2i1(i1 signext %a, i1 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: andi a1, a0, 1 +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: bnez a1, .LBB3_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vmandnot.mm v25, v8, v26 +; CHECK-NEXT: vmand.mm v26, v0, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 +; CHECK-NEXT: ret + %cmp = icmp ne i1 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv4i1(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: bnez a0, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vmandnot.mm v25, v8, v26 +; CHECK-NEXT: vmand.mm v26, v0, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv4i1(i1 signext %a, i1 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: andi a1, a0, 1 +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: bnez a1, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vmandnot.mm v25, v8, v26 +; CHECK-NEXT: vmand.mm v26, v0, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 +; CHECK-NEXT: ret + %cmp = icmp ne i1 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv8i1(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: bnez a0, .LBB6_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vmandnot.mm v25, v8, v26 +; CHECK-NEXT: vmand.mm v26, v0, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv8i1(i1 signext %a, i1 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: andi a1, a0, 1 +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: bnez a1, .LBB7_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vmandnot.mm v25, v8, v26 +; CHECK-NEXT: vmand.mm v26, v0, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 +; CHECK-NEXT: ret + %cmp = icmp ne i1 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv16i1(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: bnez a0, .LBB8_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB8_2: +; CHECK-NEXT: vsetvli a0, zero, e8,m2,ta,mu +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vmandnot.mm v26, v8, v25 +; CHECK-NEXT: vmand.mm v25, v0, v25 +; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv16i1(i1 signext %a, i1 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: andi a1, a0, 1 +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: bnez a1, .LBB9_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB9_2: +; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vmandnot.mm v26, v8, v25 +; CHECK-NEXT: vmand.mm v25, v0, v25 +; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i1 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv32i1(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: bnez a0, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: vsetvli a0, zero, e8,m4,ta,mu +; CHECK-NEXT: vmv.v.x v28, a1 +; CHECK-NEXT: vmsne.vi v25, v28, 0 +; CHECK-NEXT: vmandnot.mm v26, v8, v25 +; CHECK-NEXT: vmand.mm v25, v0, v25 +; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv32i1(i1 signext %a, i1 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: andi a1, a0, 1 +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: bnez a1, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu +; CHECK-NEXT: vmv.v.x v28, a0 +; CHECK-NEXT: vmsne.vi v25, v28, 0 +; CHECK-NEXT: vmandnot.mm v26, v8, v25 +; CHECK-NEXT: vmand.mm v25, v0, v25 +; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i1 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv64i1(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: bnez a0, .LBB12_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB12_2: +; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu +; CHECK-NEXT: vmv.v.x v16, a1 +; CHECK-NEXT: vmsne.vi v25, v16, 0 +; CHECK-NEXT: vmandnot.mm v26, v8, v25 +; CHECK-NEXT: vmand.mm v25, v0, v25 +; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv64i1(i1 signext %a, i1 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: andi a1, a0, 1 +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: bnez a1, .LBB13_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .LBB13_2: +; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vmsne.vi v25, v16, 0 +; CHECK-NEXT: vmandnot.mm v26, v8, v25 +; CHECK-NEXT: vmand.mm v25, v0, v25 +; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i1 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv1i8(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB14_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB14_2: +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv1i8(i8 signext %a, i8 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB15_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB15_2: +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a2 +; CHECK-NEXT: vmv.v.x v26, a2 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i8 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv2i8(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB16_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB16_2: +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv2i8(i8 signext %a, i8 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB17_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB17_2: +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a2 +; CHECK-NEXT: vmv.v.x v26, a2 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i8 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv4i8(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB18_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB18_2: +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv4i8(i8 signext %a, i8 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB19_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB19_2: +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a2 +; CHECK-NEXT: vmv.v.x v26, a2 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i8 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv8i8(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB20_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB20_2: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv8i8(i8 signext %a, i8 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB21_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB21_2: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a2 +; CHECK-NEXT: vmv.v.x v26, a2 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i8 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv16i8(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB22_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: vsetvli a0, zero, e8,m2,ta,mu +; CHECK-NEXT: vand.vx v26, v8, a1 +; CHECK-NEXT: vmv.v.x v28, a1 +; CHECK-NEXT: vxor.vi v28, v28, -1 +; CHECK-NEXT: vand.vv v28, v10, v28 +; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv16i8(i8 signext %a, i8 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB23_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: vsetvli a0, zero, e8,m2,ta,mu +; CHECK-NEXT: vand.vx v26, v8, a2 +; CHECK-NEXT: vmv.v.x v28, a2 +; CHECK-NEXT: vxor.vi v28, v28, -1 +; CHECK-NEXT: vand.vv v28, v10, v28 +; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: ret + %cmp = icmp ne i8 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv32i8(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB24_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB24_2: +; CHECK-NEXT: vsetvli a0, zero, e8,m4,ta,mu +; CHECK-NEXT: vand.vx v28, v8, a1 +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vxor.vi v8, v8, -1 +; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv32i8(i8 signext %a, i8 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB25_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vsetvli a0, zero, e8,m4,ta,mu +; CHECK-NEXT: vand.vx v28, v8, a2 +; CHECK-NEXT: vmv.v.x v8, a2 +; CHECK-NEXT: vxor.vi v8, v8, -1 +; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: ret + %cmp = icmp ne i8 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv64i8(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB26_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vmv.v.x v24, a1 +; CHECK-NEXT: vxor.vi v24, v24, -1 +; CHECK-NEXT: vand.vv v16, v16, v24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv64i8(i8 signext %a, i8 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB27_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vmv.v.x v24, a2 +; CHECK-NEXT: vxor.vi v24, v24, -1 +; CHECK-NEXT: vand.vv v16, v16, v24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: ret + %cmp = icmp ne i8 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv1i16(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB28_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB28_2: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv1i16(i16 signext %a, i16 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB29_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB29_2: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a2 +; CHECK-NEXT: vmv.v.x v26, a2 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i16 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv2i16(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB30_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB30_2: +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv2i16(i16 signext %a, i16 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB31_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB31_2: +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a2 +; CHECK-NEXT: vmv.v.x v26, a2 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i16 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv4i16(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB32_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv4i16(i16 signext %a, i16 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB33_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a2 +; CHECK-NEXT: vmv.v.x v26, a2 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i16 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv8i16(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB34_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB34_2: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vand.vx v26, v8, a1 +; CHECK-NEXT: vmv.v.x v28, a1 +; CHECK-NEXT: vxor.vi v28, v28, -1 +; CHECK-NEXT: vand.vv v28, v10, v28 +; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv8i16(i16 signext %a, i16 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB35_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB35_2: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vand.vx v26, v8, a2 +; CHECK-NEXT: vmv.v.x v28, a2 +; CHECK-NEXT: vxor.vi v28, v28, -1 +; CHECK-NEXT: vand.vv v28, v10, v28 +; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: ret + %cmp = icmp ne i16 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv16i16(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB36_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB36_2: +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: vand.vx v28, v8, a1 +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vxor.vi v8, v8, -1 +; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv16i16(i16 signext %a, i16 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB37_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB37_2: +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: vand.vx v28, v8, a2 +; CHECK-NEXT: vmv.v.x v8, a2 +; CHECK-NEXT: vxor.vi v8, v8, -1 +; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: ret + %cmp = icmp ne i16 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv32i16(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB38_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB38_2: +; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vmv.v.x v24, a1 +; CHECK-NEXT: vxor.vi v24, v24, -1 +; CHECK-NEXT: vand.vv v16, v16, v24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv32i16(i16 signext %a, i16 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB39_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB39_2: +; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vmv.v.x v24, a2 +; CHECK-NEXT: vxor.vi v24, v24, -1 +; CHECK-NEXT: vand.vv v16, v16, v24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: ret + %cmp = icmp ne i16 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv1i32(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB40_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB40_2: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv1i32(i32 signext %a, i32 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB41_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB41_2: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a2 +; CHECK-NEXT: vmv.v.x v26, a2 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i32 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv2i32(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB42_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB42_2: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a1 +; CHECK-NEXT: vmv.v.x v26, a1 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv2i32(i32 signext %a, i32 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB43_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB43_2: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vand.vx v25, v8, a2 +; CHECK-NEXT: vmv.v.x v26, a2 +; CHECK-NEXT: vxor.vi v26, v26, -1 +; CHECK-NEXT: vand.vv v26, v9, v26 +; CHECK-NEXT: vor.vv v8, v25, v26 +; CHECK-NEXT: ret + %cmp = icmp ne i32 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv4i32(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB44_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB44_2: +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vand.vx v26, v8, a1 +; CHECK-NEXT: vmv.v.x v28, a1 +; CHECK-NEXT: vxor.vi v28, v28, -1 +; CHECK-NEXT: vand.vv v28, v10, v28 +; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv4i32(i32 signext %a, i32 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB45_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB45_2: +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vand.vx v26, v8, a2 +; CHECK-NEXT: vmv.v.x v28, a2 +; CHECK-NEXT: vxor.vi v28, v28, -1 +; CHECK-NEXT: vand.vv v28, v10, v28 +; CHECK-NEXT: vor.vv v8, v26, v28 +; CHECK-NEXT: ret + %cmp = icmp ne i32 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv8i32(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB46_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB46_2: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vand.vx v28, v8, a1 +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vxor.vi v8, v8, -1 +; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv8i32(i32 signext %a, i32 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB47_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB47_2: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vand.vx v28, v8, a2 +; CHECK-NEXT: vmv.v.x v8, a2 +; CHECK-NEXT: vxor.vi v8, v8, -1 +; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vor.vv v8, v28, v8 +; CHECK-NEXT: ret + %cmp = icmp ne i32 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv16i32(i1 zeroext %c, %a, %b) { +; CHECK-LABEL: select_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: bnez a0, .LBB48_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, zero +; CHECK-NEXT: .LBB48_2: +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vmv.v.x v24, a1 +; CHECK-NEXT: vxor.vi v24, v24, -1 +; CHECK-NEXT: vand.vv v16, v16, v24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv16i32(i32 signext %a, i32 signext %b, %c, %d) { +; CHECK-LABEL: selectcc_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, -1 +; CHECK-NEXT: bne a0, a1, .LBB49_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, zero +; CHECK-NEXT: .LBB49_2: +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vmv.v.x v24, a2 +; CHECK-NEXT: vxor.vi v24, v24, -1 +; CHECK-NEXT: vand.vv v16, v16, v24 +; CHECK-NEXT: vor.vv v8, v8, v16 +; CHECK-NEXT: ret + %cmp = icmp ne i32 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv1i64(i1 zeroext %c, %a, %b) { +; RV32-LABEL: select_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: bnez a0, .LBB50_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB50_2: +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vand.vv v26, v8, v25 +; RV32-NEXT: vxor.vi v25, v25, -1 +; RV32-NEXT: vand.vv v25, v9, v25 +; RV32-NEXT: vor.vv v8, v26, v25 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: select_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bnez a0, .LBB50_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB50_2: +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vand.vx v25, v8, a1 +; RV64-NEXT: vmv.v.x v26, a1 +; RV64-NEXT: vxor.vi v26, v26, -1 +; RV64-NEXT: vand.vv v26, v9, v26 +; RV64-NEXT: vor.vv v8, v25, v26 +; RV64-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv1i64(i64 signext %a, i64 signext %b, %c, %d) { +; RV32-LABEL: selectcc_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: xor a1, a1, a3 +; RV32-NEXT: xor a0, a0, a2 +; RV32-NEXT: or a1, a0, a1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: bnez a1, .LBB51_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB51_2: +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vand.vv v26, v8, v25 +; RV32-NEXT: vxor.vi v25, v25, -1 +; RV32-NEXT: vand.vv v25, v9, v25 +; RV32-NEXT: vor.vv v8, v26, v25 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: selectcc_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a0, a1, .LBB51_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB51_2: +; RV64-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64-NEXT: vand.vx v25, v8, a2 +; RV64-NEXT: vmv.v.x v26, a2 +; RV64-NEXT: vxor.vi v26, v26, -1 +; RV64-NEXT: vand.vv v26, v9, v26 +; RV64-NEXT: vor.vv v8, v25, v26 +; RV64-NEXT: ret + %cmp = icmp ne i64 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv2i64(i1 zeroext %c, %a, %b) { +; RV32-LABEL: select_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: bnez a0, .LBB52_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB52_2: +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vand.vv v28, v8, v26 +; RV32-NEXT: vxor.vi v26, v26, -1 +; RV32-NEXT: vand.vv v26, v10, v26 +; RV32-NEXT: vor.vv v8, v28, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: select_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bnez a0, .LBB52_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB52_2: +; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV64-NEXT: vand.vx v26, v8, a1 +; RV64-NEXT: vmv.v.x v28, a1 +; RV64-NEXT: vxor.vi v28, v28, -1 +; RV64-NEXT: vand.vv v28, v10, v28 +; RV64-NEXT: vor.vv v8, v26, v28 +; RV64-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv2i64(i64 signext %a, i64 signext %b, %c, %d) { +; RV32-LABEL: selectcc_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: xor a1, a1, a3 +; RV32-NEXT: xor a0, a0, a2 +; RV32-NEXT: or a1, a0, a1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: bnez a1, .LBB53_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB53_2: +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vand.vv v28, v8, v26 +; RV32-NEXT: vxor.vi v26, v26, -1 +; RV32-NEXT: vand.vv v26, v10, v26 +; RV32-NEXT: vor.vv v8, v28, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: selectcc_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a0, a1, .LBB53_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB53_2: +; RV64-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; RV64-NEXT: vand.vx v26, v8, a2 +; RV64-NEXT: vmv.v.x v28, a2 +; RV64-NEXT: vxor.vi v28, v28, -1 +; RV64-NEXT: vand.vv v28, v10, v28 +; RV64-NEXT: vor.vv v8, v26, v28 +; RV64-NEXT: ret + %cmp = icmp ne i64 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv4i64(i1 zeroext %c, %a, %b) { +; RV32-LABEL: select_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: bnez a0, .LBB54_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB54_2: +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vand.vv v8, v8, v28 +; RV32-NEXT: vxor.vi v28, v28, -1 +; RV32-NEXT: vand.vv v28, v12, v28 +; RV32-NEXT: vor.vv v8, v8, v28 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: select_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bnez a0, .LBB54_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB54_2: +; RV64-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV64-NEXT: vand.vx v28, v8, a1 +; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: vand.vv v8, v12, v8 +; RV64-NEXT: vor.vv v8, v28, v8 +; RV64-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv4i64(i64 signext %a, i64 signext %b, %c, %d) { +; RV32-LABEL: selectcc_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: xor a1, a1, a3 +; RV32-NEXT: xor a0, a0, a2 +; RV32-NEXT: or a1, a0, a1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: bnez a1, .LBB55_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB55_2: +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vand.vv v8, v8, v28 +; RV32-NEXT: vxor.vi v28, v28, -1 +; RV32-NEXT: vand.vv v28, v12, v28 +; RV32-NEXT: vor.vv v8, v8, v28 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: selectcc_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a0, a1, .LBB55_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB55_2: +; RV64-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; RV64-NEXT: vand.vx v28, v8, a2 +; RV64-NEXT: vmv.v.x v8, a2 +; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: vand.vv v8, v12, v8 +; RV64-NEXT: vor.vv v8, v28, v8 +; RV64-NEXT: ret + %cmp = icmp ne i64 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +} + +define @select_nxv8i64(i1 zeroext %c, %a, %b) { +; RV32-LABEL: select_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: bnez a0, .LBB56_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, zero +; RV32-NEXT: .LBB56_2: +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vxor.vi v24, v24, -1 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: select_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, -1 +; RV64-NEXT: bnez a0, .LBB56_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, zero +; RV64-NEXT: .LBB56_2: +; RV64-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vmv.v.x v24, a1 +; RV64-NEXT: vxor.vi v24, v24, -1 +; RV64-NEXT: vand.vv v16, v16, v24 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: ret + %v = select i1 %c, %a, %b + ret %v +} + +define @selectcc_nxv8i64(i64 signext %a, i64 signext %b, %c, %d) { +; RV32-LABEL: selectcc_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: xor a1, a1, a3 +; RV32-NEXT: xor a0, a0, a2 +; RV32-NEXT: or a1, a0, a1 +; RV32-NEXT: addi a0, zero, -1 +; RV32-NEXT: bnez a1, .LBB57_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, zero +; RV32-NEXT: .LBB57_2: +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vxor.vi v24, v24, -1 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: selectcc_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: addi a2, zero, -1 +; RV64-NEXT: bne a0, a1, .LBB57_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, zero +; RV64-NEXT: .LBB57_2: +; RV64-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vmv.v.x v24, a2 +; RV64-NEXT: vxor.vi v24, v24, -1 +; RV64-NEXT: vand.vv v16, v16, v24 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: ret + %cmp = icmp ne i64 %a, %b + %v = select i1 %cmp, %c, %d + ret %v +}