diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1367,12 +1367,15 @@ // according to the size of the final vector - use i8 chunks rather than // XLenVT if we're producing a v8i1. This results in more consistent // codegen across RV32 and RV64. - // If we have to use more than one INSERT_VECTOR_ELT then this optimization - // is likely to increase code size; avoid peforming it in such a case. unsigned NumViaIntegerBits = std::min(std::max(NumElts, 8u), Subtarget.getXLen()); - if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && - (!DAG.shouldOptForSize() || NumElts <= NumViaIntegerBits)) { + if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { + // If we have to use more than one INSERT_VECTOR_ELT then this + // optimization is likely to increase code size; avoid peforming it in + // such a case. We can go through the stack as long as we're at least + // byte-sized. + if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits) + return SDValue(); // Now we can create our integer vector type. Note that it may be larger // than the resulting mask type: v4i1 would use v1i8 as its integer type. MVT IntegerViaVecVT = @@ -1427,20 +1430,29 @@ return Vec; } - // A splat can be lowered as a SETCC. For each fixed-length mask vector - // type, we have a legal equivalently-sized i8 type, so we can use that. + // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask + // vector type, we have a legal equivalently-sized i8 type, so we can use + // that. + MVT WideVecVT = VT.changeVectorElementType(MVT::i8); + SDValue VecZero = DAG.getConstant(0, DL, WideVecVT); + + SDValue WideVec; if (SDValue Splat = cast(Op)->getSplatValue()) { + // For a splat, perform a scalar truncate before creating the wider + // vector. assert(Splat.getValueType() == XLenVT && "Unexpected type for i1 splat value"); - MVT InterVT = VT.changeVectorElementType(MVT::i8); Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat, DAG.getConstant(1, DL, XLenVT)); - Splat = DAG.getSplatBuildVector(InterVT, DL, Splat); - SDValue Zero = DAG.getConstant(0, DL, InterVT); - return DAG.getSetCC(DL, VT, Splat, Zero, ISD::SETNE); + WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat); + } else { + SmallVector Ops(Op->op_values()); + WideVec = DAG.getBuildVector(WideVecVT, DL, Ops); + SDValue VecOne = DAG.getConstant(1, DL, WideVecVT); + WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne); } - return SDValue(); + return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE); } if (SDValue Splat = cast(Op)->getSplatValue()) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -8,6 +8,64 @@ ; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX8 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX8 +define <1 x i1> @buildvec_mask_nonconst_v1i1(i1 %x) { +; CHECK-LABEL: buildvec_mask_nonconst_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: vsetivli a1, 1, e8,mf8,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: ret + %1 = insertelement <1 x i1> undef, i1 %x, i32 0 + ret <1 x i1> %1 +} + +define <1 x i1> @buildvec_mask_optsize_nonconst_v1i1(i1 %x) optsize { +; CHECK-LABEL: buildvec_mask_optsize_nonconst_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: vsetivli a1, 1, e8,mf8,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: ret + %1 = insertelement <1 x i1> undef, i1 %x, i32 0 + ret <1 x i1> %1 +} + +define <2 x i1> @buildvec_mask_nonconst_v2i1(i1 %x, i1 %y) { +; CHECK-LABEL: buildvec_mask_nonconst_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e8,mf8,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmv.s.x v25, a0 +; CHECK-NEXT: vand.vi v25, v25, 1 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: ret + %1 = insertelement <2 x i1> undef, i1 %x, i32 0 + %2 = insertelement <2 x i1> %1, i1 %y, i32 1 + ret <2 x i1> %2 +} + +; FIXME: optsize isn't smaller than the code above +define <2 x i1> @buildvec_mask_optsize_nonconst_v2i1(i1 %x, i1 %y) optsize { +; CHECK-LABEL: buildvec_mask_optsize_nonconst_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sb a1, 15(sp) +; CHECK-NEXT: sb a0, 14(sp) +; CHECK-NEXT: vsetivli a0, 2, e8,mf8,ta,mu +; CHECK-NEXT: addi a0, sp, 14 +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, 1 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %1 = insertelement <2 x i1> undef, i1 %x, i32 0 + %2 = insertelement <2 x i1> %1, i1 %y, i32 1 + ret <2 x i1> %2 +} + define <3 x i1> @buildvec_mask_v1i1() { ; CHECK-LABEL: buildvec_mask_v1i1: ; CHECK: # %bb.0: @@ -38,6 +96,73 @@ ret <4 x i1> } +define <4 x i1> @buildvec_mask_nonconst_v4i1(i1 %x, i1 %y) { +; CHECK-LABEL: buildvec_mask_nonconst_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 3 +; CHECK-NEXT: vsetivli a3, 1, e8,mf8,ta,mu +; CHECK-NEXT: vmv.s.x v0, a2 +; CHECK-NEXT: vsetivli a2, 4, e8,mf4,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmerge.vxm v25, v25, a0, v0 +; CHECK-NEXT: vand.vi v25, v25, 1 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: ret + %1 = insertelement <4 x i1> undef, i1 %x, i32 0 + %2 = insertelement <4 x i1> %1, i1 %x, i32 1 + %3 = insertelement <4 x i1> %2, i1 %y, i32 2 + %4 = insertelement <4 x i1> %3, i1 %y, i32 3 + ret <4 x i1> %4 +} + +; FIXME: optsize isn't smaller than the code above +define <4 x i1> @buildvec_mask_optsize_nonconst_v4i1(i1 %x, i1 %y) optsize { +; CHECK-LABEL: buildvec_mask_optsize_nonconst_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sb a1, 15(sp) +; CHECK-NEXT: sb a1, 14(sp) +; CHECK-NEXT: sb a0, 13(sp) +; CHECK-NEXT: sb a0, 12(sp) +; CHECK-NEXT: vsetivli a0, 4, e8,mf4,ta,mu +; CHECK-NEXT: addi a0, sp, 12 +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, 1 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %1 = insertelement <4 x i1> undef, i1 %x, i32 0 + %2 = insertelement <4 x i1> %1, i1 %x, i32 1 + %3 = insertelement <4 x i1> %2, i1 %y, i32 2 + %4 = insertelement <4 x i1> %3, i1 %y, i32 3 + ret <4 x i1> %4 +} + +define <4 x i1> @buildvec_mask_nonconst_v4i1_2(i1 %x, i1 %y) { +; CHECK-LABEL: buildvec_mask_nonconst_v4i1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sb a1, 15(sp) +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: sb a1, 14(sp) +; CHECK-NEXT: sb a0, 13(sp) +; CHECK-NEXT: sb zero, 12(sp) +; CHECK-NEXT: vsetivli a0, 4, e8,mf4,ta,mu +; CHECK-NEXT: addi a0, sp, 12 +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, 1 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %1 = insertelement <4 x i1> undef, i1 0, i32 0 + %2 = insertelement <4 x i1> %1, i1 %x, i32 1 + %3 = insertelement <4 x i1> %2, i1 1, i32 2 + %4 = insertelement <4 x i1> %3, i1 %y, i32 3 + ret <4 x i1> %4 +} + define <8 x i1> @buildvec_mask_v8i1() { ; CHECK-LABEL: buildvec_mask_v8i1: ; CHECK: # %bb.0: @@ -48,6 +173,124 @@ ret <8 x i1> } +define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) { +; CHECK-LABEL: buildvec_mask_nonconst_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 19 +; CHECK-NEXT: vsetivli a3, 1, e8,mf8,ta,mu +; CHECK-NEXT: vmv.s.x v0, a2 +; CHECK-NEXT: vsetivli a2, 8, e8,mf2,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmerge.vxm v25, v25, a0, v0 +; CHECK-NEXT: vand.vi v25, v25, 1 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: ret + %1 = insertelement <8 x i1> undef, i1 %x, i32 0 + %2 = insertelement <8 x i1> %1, i1 %x, i32 1 + %3 = insertelement <8 x i1> %2, i1 %y, i32 2 + %4 = insertelement <8 x i1> %3, i1 %y, i32 3 + %5 = insertelement <8 x i1> %4, i1 %x, i32 4 + %6 = insertelement <8 x i1> %5, i1 %y, i32 5 + %7 = insertelement <8 x i1> %6, i1 %y, i32 6 + %8 = insertelement <8 x i1> %7, i1 %y, i32 7 + ret <8 x i1> %8 +} + +define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) { +; CHECK-LABEL: buildvec_mask_nonconst_v8i1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sb a2, 15(sp) +; CHECK-NEXT: sb zero, 14(sp) +; CHECK-NEXT: sb a3, 13(sp) +; CHECK-NEXT: sb a0, 12(sp) +; CHECK-NEXT: sb a1, 11(sp) +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: sb a1, 10(sp) +; CHECK-NEXT: sb a0, 9(sp) +; CHECK-NEXT: sb a0, 8(sp) +; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, 1 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %1 = insertelement <8 x i1> undef, i1 %x, i32 0 + %2 = insertelement <8 x i1> %1, i1 %x, i32 1 + %3 = insertelement <8 x i1> %2, i1 1, i32 2 + %4 = insertelement <8 x i1> %3, i1 %y, i32 3 + %5 = insertelement <8 x i1> %4, i1 %x, i32 4 + %6 = insertelement <8 x i1> %5, i1 %w, i32 5 + %7 = insertelement <8 x i1> %6, i1 0, i32 6 + %8 = insertelement <8 x i1> %7, i1 %z, i32 7 + ret <8 x i1> %8 +} + +define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) optsize { +; CHECK-LABEL: buildvec_mask_optsize_nonconst_v8i1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sb a2, 15(sp) +; CHECK-NEXT: sb zero, 14(sp) +; CHECK-NEXT: sb a3, 13(sp) +; CHECK-NEXT: sb a0, 12(sp) +; CHECK-NEXT: sb a1, 11(sp) +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: sb a1, 10(sp) +; CHECK-NEXT: sb a0, 9(sp) +; CHECK-NEXT: sb a0, 8(sp) +; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, 1 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %1 = insertelement <8 x i1> undef, i1 %x, i32 0 + %2 = insertelement <8 x i1> %1, i1 %x, i32 1 + %3 = insertelement <8 x i1> %2, i1 1, i32 2 + %4 = insertelement <8 x i1> %3, i1 %y, i32 3 + %5 = insertelement <8 x i1> %4, i1 %x, i32 4 + %6 = insertelement <8 x i1> %5, i1 %w, i32 5 + %7 = insertelement <8 x i1> %6, i1 0, i32 6 + %8 = insertelement <8 x i1> %7, i1 %z, i32 7 + ret <8 x i1> %8 +} + +define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize { +; CHECK-LABEL: buildvec_mask_optsize_nonconst_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sb a1, 15(sp) +; CHECK-NEXT: sb a1, 14(sp) +; CHECK-NEXT: sb a1, 13(sp) +; CHECK-NEXT: sb a0, 12(sp) +; CHECK-NEXT: sb a1, 11(sp) +; CHECK-NEXT: sb a1, 10(sp) +; CHECK-NEXT: sb a0, 9(sp) +; CHECK-NEXT: sb a0, 8(sp) +; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, 1 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %1 = insertelement <8 x i1> undef, i1 %x, i32 0 + %2 = insertelement <8 x i1> %1, i1 %x, i32 1 + %3 = insertelement <8 x i1> %2, i1 %y, i32 2 + %4 = insertelement <8 x i1> %3, i1 %y, i32 3 + %5 = insertelement <8 x i1> %4, i1 %x, i32 4 + %6 = insertelement <8 x i1> %5, i1 %y, i32 5 + %7 = insertelement <8 x i1> %6, i1 %y, i32 6 + %8 = insertelement <8 x i1> %7, i1 %y, i32 7 + ret <8 x i1> %8 +} + define <10 x i1> @buildvec_mask_v10i1() { ; CHECK-LABEL: buildvec_mask_v10i1: ; CHECK: # %bb.0: @@ -518,13 +761,13 @@ ; ; RV32-LMULMAX4-LABEL: buildvec_mask_optsize_v128i1: ; RV32-LMULMAX4: # %bb.0: -; RV32-LMULMAX4-NEXT: lui a0, %hi(.LCPI10_0) -; RV32-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI10_0) +; RV32-LMULMAX4-NEXT: lui a0, %hi(.LCPI21_0) +; RV32-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI21_0) ; RV32-LMULMAX4-NEXT: addi a1, zero, 64 ; RV32-LMULMAX4-NEXT: vsetvli a1, a1, e8,m4,ta,mu ; RV32-LMULMAX4-NEXT: vle1.v v0, (a0) -; RV32-LMULMAX4-NEXT: lui a0, %hi(.LCPI10_1) -; RV32-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI10_1) +; RV32-LMULMAX4-NEXT: lui a0, %hi(.LCPI21_1) +; RV32-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI21_1) ; RV32-LMULMAX4-NEXT: vle1.v v8, (a0) ; RV32-LMULMAX4-NEXT: ret ; @@ -551,8 +794,8 @@ ; ; RV32-LMULMAX8-LABEL: buildvec_mask_optsize_v128i1: ; RV32-LMULMAX8: # %bb.0: -; RV32-LMULMAX8-NEXT: lui a0, %hi(.LCPI10_0) -; RV32-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI10_0) +; RV32-LMULMAX8-NEXT: lui a0, %hi(.LCPI21_0) +; RV32-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI21_0) ; RV32-LMULMAX8-NEXT: addi a1, zero, 128 ; RV32-LMULMAX8-NEXT: vsetvli a1, a1, e8,m8,ta,mu ; RV32-LMULMAX8-NEXT: vle1.v v0, (a0) @@ -560,8 +803,8 @@ ; ; RV64-LMULMAX8-LABEL: buildvec_mask_optsize_v128i1: ; RV64-LMULMAX8: # %bb.0: -; RV64-LMULMAX8-NEXT: lui a0, %hi(.LCPI10_0) -; RV64-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI10_0) +; RV64-LMULMAX8-NEXT: lui a0, %hi(.LCPI21_0) +; RV64-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI21_0) ; RV64-LMULMAX8-NEXT: addi a1, zero, 128 ; RV64-LMULMAX8-NEXT: vsetvli a1, a1, e8,m8,ta,mu ; RV64-LMULMAX8-NEXT: vle1.v v0, (a0)