diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1124,6 +1124,8 @@ SDValue Mask, VL; std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + unsigned NumElts = Op.getNumOperands(); + if (VT.getVectorElementType() == MVT::i1) { if (ISD::isBuildVectorAllZeros(Op.getNode())) { SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); @@ -1135,6 +1137,71 @@ return convertFromScalableVector(VT, VMSet, DAG, Subtarget); } + // Lower constant mask BUILD_VECTORs via an integer vector type, in + // scalar integer chunks whose bit-width depends on the number of mask + // bits and XLEN. + // First, determine the most appropriate scalar integer type to use. This + // is at most XLenVT, but may be shrunk to a smaller vector element type + // according to the size of the final vector - use i8 chunks rather than + // XLenVT if we're producing a v8i1. This results in more consistent + // codegen across RV32 and RV64. + // If we have to use more than one INSERT_VECTOR_ELT then this optimization + // is likely to increase code size; avoid peforming it in such a case. + unsigned NumViaIntegerBits = + std::min(std::max(NumElts, 8u), Subtarget.getXLen()); + if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && + (!DAG.shouldOptForSize() || NumElts <= NumViaIntegerBits)) { + // Now we can create our integer vector type. Note that it may be larger + // than the resulting mask type: v4i1 would use v1i8 as its integer type. + MVT IntegerViaVecVT = + MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits), + divideCeil(NumElts, NumViaIntegerBits)); + + uint64_t Bits = 0; + unsigned BitPos = 0, IntegerEltIdx = 0; + MVT XLenVT = Subtarget.getXLenVT(); + SDValue Vec = DAG.getUNDEF(IntegerViaVecVT); + + for (unsigned I = 0; I < NumElts; I++, BitPos++) { + // Once we accumulate enough bits to fill our scalar type, insert into + // our vector and clear our accumulated data. + if (I != 0 && I % NumViaIntegerBits == 0) { + SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); + Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, + Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT)); + Bits = 0; + BitPos = 0; + IntegerEltIdx++; + } + SDValue V = Op.getOperand(I); + bool BitValue = !V.isUndef() && cast(V)->getZExtValue(); + Bits |= (BitValue << BitPos); + } + + // Insert the (remaining) scalar value into position in our integer + // vector type. + SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); + Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt, + DAG.getConstant(IntegerEltIdx, DL, XLenVT)); + + if (NumElts < NumViaIntegerBits) { + // If we're producing a smaller vector than our minimum legal integer + // type, bitcast to the equivalent (known-legal) mask type, and extract + // our final mask. + assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type"); + Vec = DAG.getBitcast(MVT::v8i1, Vec); + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec, + DAG.getConstant(0, DL, XLenVT)); + } else { + // Else we must have produced an integer type with the same size as the + // mask type; bitcast for the final result. + assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits()); + Vec = DAG.getBitcast(VT, Vec); + } + + return Vec; + } + return SDValue(); } @@ -1145,8 +1212,6 @@ return convertFromScalableVector(VT, Splat, DAG, Subtarget); } - unsigned NumElts = Op.getNumOperands(); - // Try and match an index sequence, which we can lower directly to the vid // instruction. An all-undef vector is matched by getSplatValue, above. if (VT.isInteger()) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -0,0 +1,473 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX1 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX1 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX2 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX2 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX4 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX4 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX8 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX8 + +define <3 x i1> @buildvec_mask_v1i1() { +; CHECK-LABEL: buildvec_mask_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: ret + ret <3 x i1> +} + +define <3 x i1> @buildvec_mask_optsize_v1i1() optsize { +; CHECK-LABEL: buildvec_mask_optsize_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: ret + ret <3 x i1> +} + +define <4 x i1> @buildvec_mask_v4i1() { +; CHECK-LABEL: buildvec_mask_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 6 +; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: ret + ret <4 x i1> +} + +define <8 x i1> @buildvec_mask_v8i1() { +; CHECK-LABEL: buildvec_mask_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 182 +; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: ret + ret <8 x i1> +} + +define <10 x i1> @buildvec_mask_v10i1() { +; CHECK-LABEL: buildvec_mask_v10i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 949 +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: ret + ret <10 x i1> +} + +define <16 x i1> @buildvec_mask_v16i1() { +; CHECK-RV32-LABEL: buildvec_mask_v16i1: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: lui a0, 11 +; CHECK-RV32-NEXT: addi a0, a0, 1718 +; CHECK-RV32-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-RV32-NEXT: vmv.s.x v0, a0 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: buildvec_mask_v16i1: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: lui a0, 11 +; CHECK-RV64-NEXT: addiw a0, a0, 1718 +; CHECK-RV64-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-RV64-NEXT: vmv.s.x v0, a0 +; CHECK-RV64-NEXT: ret + ret <16 x i1> +} + +define <16 x i1> @buildvec_mask_v16i1_undefs() { +; CHECK-LABEL: buildvec_mask_v16i1_undefs: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1722 +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: ret + ret <16 x i1> +} + +define <32 x i1> @buildvec_mask_v32i1() { +; RV32-LMULMAX1-LABEL: buildvec_mask_v32i1: +; RV32-LMULMAX1: # %bb.0: +; RV32-LMULMAX1-NEXT: addi a0, zero, 1776 +; RV32-LMULMAX1-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; RV32-LMULMAX1-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX1-NEXT: lui a0, 11 +; RV32-LMULMAX1-NEXT: addi a0, a0, 1718 +; RV32-LMULMAX1-NEXT: vmv.s.x v8, a0 +; RV32-LMULMAX1-NEXT: ret +; +; RV64-LMULMAX1-LABEL: buildvec_mask_v32i1: +; RV64-LMULMAX1: # %bb.0: +; RV64-LMULMAX1-NEXT: addi a0, zero, 1776 +; RV64-LMULMAX1-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX1-NEXT: lui a0, 11 +; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718 +; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0 +; RV64-LMULMAX1-NEXT: ret +; +; RV32-LMULMAX2-LABEL: buildvec_mask_v32i1: +; RV32-LMULMAX2: # %bb.0: +; RV32-LMULMAX2-NEXT: lui a0, 748384 +; RV32-LMULMAX2-NEXT: addi a0, a0, 1776 +; RV32-LMULMAX2-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV32-LMULMAX2-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX2-NEXT: ret +; +; RV64-LMULMAX2-LABEL: buildvec_mask_v32i1: +; RV64-LMULMAX2: # %bb.0: +; RV64-LMULMAX2-NEXT: lui a0, 748384 +; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776 +; RV64-LMULMAX2-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX2-NEXT: ret +; +; RV32-LMULMAX4-LABEL: buildvec_mask_v32i1: +; RV32-LMULMAX4: # %bb.0: +; RV32-LMULMAX4-NEXT: lui a0, 748384 +; RV32-LMULMAX4-NEXT: addi a0, a0, 1776 +; RV32-LMULMAX4-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV32-LMULMAX4-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX4-NEXT: ret +; +; RV64-LMULMAX4-LABEL: buildvec_mask_v32i1: +; RV64-LMULMAX4: # %bb.0: +; RV64-LMULMAX4-NEXT: lui a0, 748384 +; RV64-LMULMAX4-NEXT: addiw a0, a0, 1776 +; RV64-LMULMAX4-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV64-LMULMAX4-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX4-NEXT: ret +; +; RV32-LMULMAX8-LABEL: buildvec_mask_v32i1: +; RV32-LMULMAX8: # %bb.0: +; RV32-LMULMAX8-NEXT: lui a0, 748384 +; RV32-LMULMAX8-NEXT: addi a0, a0, 1776 +; RV32-LMULMAX8-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV32-LMULMAX8-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX8-NEXT: ret +; +; RV64-LMULMAX8-LABEL: buildvec_mask_v32i1: +; RV64-LMULMAX8: # %bb.0: +; RV64-LMULMAX8-NEXT: lui a0, 748384 +; RV64-LMULMAX8-NEXT: addiw a0, a0, 1776 +; RV64-LMULMAX8-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV64-LMULMAX8-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX8-NEXT: ret + ret <32 x i1> +} + +define <64 x i1> @buildvec_mask_v64i1() { +; RV32-LMULMAX1-LABEL: buildvec_mask_v64i1: +; RV32-LMULMAX1: # %bb.0: +; RV32-LMULMAX1-NEXT: addi a0, zero, 1776 +; RV32-LMULMAX1-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; RV32-LMULMAX1-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX1-NEXT: lui a0, 4 +; RV32-LMULMAX1-NEXT: addi a0, a0, -1793 +; RV32-LMULMAX1-NEXT: vmv.s.x v9, a0 +; RV32-LMULMAX1-NEXT: lui a0, 11 +; RV32-LMULMAX1-NEXT: addi a0, a0, 1718 +; RV32-LMULMAX1-NEXT: vmv.s.x v8, a0 +; RV32-LMULMAX1-NEXT: vmv1r.v v10, v8 +; RV32-LMULMAX1-NEXT: ret +; +; RV64-LMULMAX1-LABEL: buildvec_mask_v64i1: +; RV64-LMULMAX1: # %bb.0: +; RV64-LMULMAX1-NEXT: addi a0, zero, 1776 +; RV64-LMULMAX1-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX1-NEXT: lui a0, 4 +; RV64-LMULMAX1-NEXT: addiw a0, a0, -1793 +; RV64-LMULMAX1-NEXT: vmv.s.x v9, a0 +; RV64-LMULMAX1-NEXT: lui a0, 11 +; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718 +; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0 +; RV64-LMULMAX1-NEXT: vmv1r.v v10, v8 +; RV64-LMULMAX1-NEXT: ret +; +; RV32-LMULMAX2-LABEL: buildvec_mask_v64i1: +; RV32-LMULMAX2: # %bb.0: +; RV32-LMULMAX2-NEXT: lui a0, 748384 +; RV32-LMULMAX2-NEXT: addi a0, a0, 1776 +; RV32-LMULMAX2-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV32-LMULMAX2-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX2-NEXT: lui a0, 748388 +; RV32-LMULMAX2-NEXT: addi a0, a0, -1793 +; RV32-LMULMAX2-NEXT: vmv.s.x v8, a0 +; RV32-LMULMAX2-NEXT: ret +; +; RV64-LMULMAX2-LABEL: buildvec_mask_v64i1: +; RV64-LMULMAX2: # %bb.0: +; RV64-LMULMAX2-NEXT: lui a0, 748384 +; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776 +; RV64-LMULMAX2-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX2-NEXT: lui a0, 748388 +; RV64-LMULMAX2-NEXT: addiw a0, a0, -1793 +; RV64-LMULMAX2-NEXT: vmv.s.x v8, a0 +; RV64-LMULMAX2-NEXT: ret +; +; RV32-LMULMAX4-LABEL: buildvec_mask_v64i1: +; RV32-LMULMAX4: # %bb.0: +; RV32-LMULMAX4-NEXT: lui a0, 748388 +; RV32-LMULMAX4-NEXT: addi a0, a0, -1793 +; RV32-LMULMAX4-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; RV32-LMULMAX4-NEXT: vmv.s.x v25, a0 +; RV32-LMULMAX4-NEXT: lui a0, 748384 +; RV32-LMULMAX4-NEXT: addi a0, a0, 1776 +; RV32-LMULMAX4-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX4-NEXT: vsetivli a0, 2, e32,m1,tu,mu +; RV32-LMULMAX4-NEXT: vslideup.vi v0, v25, 1 +; RV32-LMULMAX4-NEXT: ret +; +; RV64-LMULMAX4-LABEL: buildvec_mask_v64i1: +; RV64-LMULMAX4: # %bb.0: +; RV64-LMULMAX4-NEXT: lui a0, 748388 +; RV64-LMULMAX4-NEXT: addiw a0, a0, -257 +; RV64-LMULMAX4-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-LMULMAX4-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX4-NEXT: ret +; +; RV32-LMULMAX8-LABEL: buildvec_mask_v64i1: +; RV32-LMULMAX8: # %bb.0: +; RV32-LMULMAX8-NEXT: lui a0, 748388 +; RV32-LMULMAX8-NEXT: addi a0, a0, -1793 +; RV32-LMULMAX8-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; RV32-LMULMAX8-NEXT: vmv.s.x v25, a0 +; RV32-LMULMAX8-NEXT: lui a0, 748384 +; RV32-LMULMAX8-NEXT: addi a0, a0, 1776 +; RV32-LMULMAX8-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX8-NEXT: vsetivli a0, 2, e32,m1,tu,mu +; RV32-LMULMAX8-NEXT: vslideup.vi v0, v25, 1 +; RV32-LMULMAX8-NEXT: ret +; +; RV64-LMULMAX8-LABEL: buildvec_mask_v64i1: +; RV64-LMULMAX8: # %bb.0: +; RV64-LMULMAX8-NEXT: lui a0, 748388 +; RV64-LMULMAX8-NEXT: addiw a0, a0, -257 +; RV64-LMULMAX8-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-LMULMAX8-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX8-NEXT: ret + ret <64 x i1> +} + +define <128 x i1> @buildvec_mask_v128i1() { +; RV32-LMULMAX1-LABEL: buildvec_mask_v128i1: +; RV32-LMULMAX1: # %bb.0: +; RV32-LMULMAX1-NEXT: addi a0, zero, 1776 +; RV32-LMULMAX1-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; RV32-LMULMAX1-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX1-NEXT: lui a0, 4 +; RV32-LMULMAX1-NEXT: addi a0, a0, -1793 +; RV32-LMULMAX1-NEXT: vmv.s.x v9, a0 +; RV32-LMULMAX1-NEXT: lui a0, 11 +; RV32-LMULMAX1-NEXT: addi a0, a0, 1718 +; RV32-LMULMAX1-NEXT: vmv.s.x v8, a0 +; RV32-LMULMAX1-NEXT: vmv1r.v v10, v8 +; RV32-LMULMAX1-NEXT: vmv1r.v v11, v0 +; RV32-LMULMAX1-NEXT: vmv1r.v v12, v8 +; RV32-LMULMAX1-NEXT: vmv1r.v v13, v9 +; RV32-LMULMAX1-NEXT: vmv1r.v v14, v8 +; RV32-LMULMAX1-NEXT: ret +; +; RV64-LMULMAX1-LABEL: buildvec_mask_v128i1: +; RV64-LMULMAX1: # %bb.0: +; RV64-LMULMAX1-NEXT: addi a0, zero, 1776 +; RV64-LMULMAX1-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX1-NEXT: lui a0, 4 +; RV64-LMULMAX1-NEXT: addiw a0, a0, -1793 +; RV64-LMULMAX1-NEXT: vmv.s.x v9, a0 +; RV64-LMULMAX1-NEXT: lui a0, 11 +; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718 +; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0 +; RV64-LMULMAX1-NEXT: vmv1r.v v10, v8 +; RV64-LMULMAX1-NEXT: vmv1r.v v11, v0 +; RV64-LMULMAX1-NEXT: vmv1r.v v12, v8 +; RV64-LMULMAX1-NEXT: vmv1r.v v13, v9 +; RV64-LMULMAX1-NEXT: vmv1r.v v14, v8 +; RV64-LMULMAX1-NEXT: ret +; +; RV32-LMULMAX2-LABEL: buildvec_mask_v128i1: +; RV32-LMULMAX2: # %bb.0: +; RV32-LMULMAX2-NEXT: lui a0, 748384 +; RV32-LMULMAX2-NEXT: addi a0, a0, 1776 +; RV32-LMULMAX2-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV32-LMULMAX2-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX2-NEXT: lui a0, 748388 +; RV32-LMULMAX2-NEXT: addi a0, a0, -1793 +; RV32-LMULMAX2-NEXT: vmv.s.x v8, a0 +; RV32-LMULMAX2-NEXT: vmv1r.v v9, v0 +; RV32-LMULMAX2-NEXT: vmv1r.v v10, v8 +; RV32-LMULMAX2-NEXT: ret +; +; RV64-LMULMAX2-LABEL: buildvec_mask_v128i1: +; RV64-LMULMAX2: # %bb.0: +; RV64-LMULMAX2-NEXT: lui a0, 748384 +; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776 +; RV64-LMULMAX2-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX2-NEXT: lui a0, 748388 +; RV64-LMULMAX2-NEXT: addiw a0, a0, -1793 +; RV64-LMULMAX2-NEXT: vmv.s.x v8, a0 +; RV64-LMULMAX2-NEXT: vmv1r.v v9, v0 +; RV64-LMULMAX2-NEXT: vmv1r.v v10, v8 +; RV64-LMULMAX2-NEXT: ret +; +; RV32-LMULMAX4-LABEL: buildvec_mask_v128i1: +; RV32-LMULMAX4: # %bb.0: +; RV32-LMULMAX4-NEXT: lui a0, 748388 +; RV32-LMULMAX4-NEXT: addi a0, a0, -1793 +; RV32-LMULMAX4-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; RV32-LMULMAX4-NEXT: vmv.s.x v25, a0 +; RV32-LMULMAX4-NEXT: lui a0, 748384 +; RV32-LMULMAX4-NEXT: addi a0, a0, 1776 +; RV32-LMULMAX4-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX4-NEXT: vsetivli a0, 2, e32,m1,tu,mu +; RV32-LMULMAX4-NEXT: vslideup.vi v0, v25, 1 +; RV32-LMULMAX4-NEXT: vmv1r.v v8, v0 +; RV32-LMULMAX4-NEXT: ret +; +; RV64-LMULMAX4-LABEL: buildvec_mask_v128i1: +; RV64-LMULMAX4: # %bb.0: +; RV64-LMULMAX4-NEXT: lui a0, 748388 +; RV64-LMULMAX4-NEXT: addiw a0, a0, -257 +; RV64-LMULMAX4-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-LMULMAX4-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX4-NEXT: vmv1r.v v8, v0 +; RV64-LMULMAX4-NEXT: ret +; +; RV32-LMULMAX8-LABEL: buildvec_mask_v128i1: +; RV32-LMULMAX8: # %bb.0: +; RV32-LMULMAX8-NEXT: lui a0, 748388 +; RV32-LMULMAX8-NEXT: addi a0, a0, -1793 +; RV32-LMULMAX8-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; RV32-LMULMAX8-NEXT: vmv.s.x v25, a0 +; RV32-LMULMAX8-NEXT: lui a0, 748384 +; RV32-LMULMAX8-NEXT: addi a0, a0, 1776 +; RV32-LMULMAX8-NEXT: vmv.s.x v26, a0 +; RV32-LMULMAX8-NEXT: vsetivli a0, 2, e32,m1,tu,mu +; RV32-LMULMAX8-NEXT: vmv1r.v v0, v26 +; RV32-LMULMAX8-NEXT: vslideup.vi v0, v25, 1 +; RV32-LMULMAX8-NEXT: vsetivli a0, 3, e32,m1,tu,mu +; RV32-LMULMAX8-NEXT: vslideup.vi v0, v26, 2 +; RV32-LMULMAX8-NEXT: vsetivli a0, 4, e32,m1,tu,mu +; RV32-LMULMAX8-NEXT: vslideup.vi v0, v25, 3 +; RV32-LMULMAX8-NEXT: ret +; +; RV64-LMULMAX8-LABEL: buildvec_mask_v128i1: +; RV64-LMULMAX8: # %bb.0: +; RV64-LMULMAX8-NEXT: lui a0, 748388 +; RV64-LMULMAX8-NEXT: addiw a0, a0, -257 +; RV64-LMULMAX8-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; RV64-LMULMAX8-NEXT: vmv.s.x v25, a0 +; RV64-LMULMAX8-NEXT: vsetivli a0, 2, e64,m1,tu,mu +; RV64-LMULMAX8-NEXT: vmv1r.v v0, v25 +; RV64-LMULMAX8-NEXT: vslideup.vi v0, v25, 1 +; RV64-LMULMAX8-NEXT: ret + ret <128 x i1> +} + +define <128 x i1> @buildvec_mask_optsize_v128i1() optsize { +; RV32-LMULMAX1-LABEL: buildvec_mask_optsize_v128i1: +; RV32-LMULMAX1: # %bb.0: +; RV32-LMULMAX1-NEXT: addi a0, zero, 1776 +; RV32-LMULMAX1-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; RV32-LMULMAX1-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX1-NEXT: lui a0, 4 +; RV32-LMULMAX1-NEXT: addi a0, a0, -1793 +; RV32-LMULMAX1-NEXT: vmv.s.x v9, a0 +; RV32-LMULMAX1-NEXT: lui a0, 11 +; RV32-LMULMAX1-NEXT: addi a0, a0, 1718 +; RV32-LMULMAX1-NEXT: vmv.s.x v8, a0 +; RV32-LMULMAX1-NEXT: vmv1r.v v10, v8 +; RV32-LMULMAX1-NEXT: vmv1r.v v11, v0 +; RV32-LMULMAX1-NEXT: vmv1r.v v12, v8 +; RV32-LMULMAX1-NEXT: vmv1r.v v13, v9 +; RV32-LMULMAX1-NEXT: vmv1r.v v14, v8 +; RV32-LMULMAX1-NEXT: ret +; +; RV64-LMULMAX1-LABEL: buildvec_mask_optsize_v128i1: +; RV64-LMULMAX1: # %bb.0: +; RV64-LMULMAX1-NEXT: addi a0, zero, 1776 +; RV64-LMULMAX1-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX1-NEXT: lui a0, 4 +; RV64-LMULMAX1-NEXT: addiw a0, a0, -1793 +; RV64-LMULMAX1-NEXT: vmv.s.x v9, a0 +; RV64-LMULMAX1-NEXT: lui a0, 11 +; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718 +; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0 +; RV64-LMULMAX1-NEXT: vmv1r.v v10, v8 +; RV64-LMULMAX1-NEXT: vmv1r.v v11, v0 +; RV64-LMULMAX1-NEXT: vmv1r.v v12, v8 +; RV64-LMULMAX1-NEXT: vmv1r.v v13, v9 +; RV64-LMULMAX1-NEXT: vmv1r.v v14, v8 +; RV64-LMULMAX1-NEXT: ret +; +; RV32-LMULMAX2-LABEL: buildvec_mask_optsize_v128i1: +; RV32-LMULMAX2: # %bb.0: +; RV32-LMULMAX2-NEXT: lui a0, 748384 +; RV32-LMULMAX2-NEXT: addi a0, a0, 1776 +; RV32-LMULMAX2-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV32-LMULMAX2-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX2-NEXT: lui a0, 748388 +; RV32-LMULMAX2-NEXT: addi a0, a0, -1793 +; RV32-LMULMAX2-NEXT: vmv.s.x v8, a0 +; RV32-LMULMAX2-NEXT: vmv1r.v v9, v0 +; RV32-LMULMAX2-NEXT: vmv1r.v v10, v8 +; RV32-LMULMAX2-NEXT: ret +; +; RV64-LMULMAX2-LABEL: buildvec_mask_optsize_v128i1: +; RV64-LMULMAX2: # %bb.0: +; RV64-LMULMAX2-NEXT: lui a0, 748384 +; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776 +; RV64-LMULMAX2-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX2-NEXT: lui a0, 748388 +; RV64-LMULMAX2-NEXT: addiw a0, a0, -1793 +; RV64-LMULMAX2-NEXT: vmv.s.x v8, a0 +; RV64-LMULMAX2-NEXT: vmv1r.v v9, v0 +; RV64-LMULMAX2-NEXT: vmv1r.v v10, v8 +; RV64-LMULMAX2-NEXT: ret +; +; RV32-LMULMAX4-LABEL: buildvec_mask_optsize_v128i1: +; RV32-LMULMAX4: # %bb.0: +; RV32-LMULMAX4-NEXT: lui a0, %hi(.LCPI10_0) +; RV32-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI10_0) +; RV32-LMULMAX4-NEXT: addi a1, zero, 64 +; RV32-LMULMAX4-NEXT: vsetvli a1, a1, e8,m4,ta,mu +; RV32-LMULMAX4-NEXT: vle1.v v0, (a0) +; RV32-LMULMAX4-NEXT: vmv1r.v v8, v0 +; RV32-LMULMAX4-NEXT: ret +; +; RV64-LMULMAX4-LABEL: buildvec_mask_optsize_v128i1: +; RV64-LMULMAX4: # %bb.0: +; RV64-LMULMAX4-NEXT: lui a0, 748388 +; RV64-LMULMAX4-NEXT: addiw a0, a0, -257 +; RV64-LMULMAX4-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-LMULMAX4-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX4-NEXT: vmv1r.v v8, v0 +; RV64-LMULMAX4-NEXT: ret +; +; RV32-LMULMAX8-LABEL: buildvec_mask_optsize_v128i1: +; RV32-LMULMAX8: # %bb.0: +; RV32-LMULMAX8-NEXT: lui a0, %hi(.LCPI10_0) +; RV32-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI10_0) +; RV32-LMULMAX8-NEXT: addi a1, zero, 128 +; RV32-LMULMAX8-NEXT: vsetvli a1, a1, e8,m8,ta,mu +; RV32-LMULMAX8-NEXT: vle1.v v0, (a0) +; RV32-LMULMAX8-NEXT: ret +; +; RV64-LMULMAX8-LABEL: buildvec_mask_optsize_v128i1: +; RV64-LMULMAX8: # %bb.0: +; RV64-LMULMAX8-NEXT: lui a0, %hi(.LCPI10_0) +; RV64-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI10_0) +; RV64-LMULMAX8-NEXT: addi a1, zero, 128 +; RV64-LMULMAX8-NEXT: vsetvli a1, a1, e8,m8,ta,mu +; RV64-LMULMAX8-NEXT: vle1.v v0, (a0) +; RV64-LMULMAX8-NEXT: ret + ret <128 x i1> +}