diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -187,37 +187,12 @@ return false; } -static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL, - const MVT VT, int64_t Imm, - const RISCVSubtarget &Subtarget) { - assert(VT == MVT::i64 && "Expecting MVT::i64"); - const RISCVTargetLowering *TLI = Subtarget.getTargetLowering(); - ConstantPoolSDNode *CP = cast(CurDAG->getConstantPool( - ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT)); - SDValue Addr = TLI->getAddr(CP, *CurDAG); - SDValue Offset = CurDAG->getTargetConstant(0, DL, VT); - // Since there is no data race, the chain can be the entry node. - SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset, - CurDAG->getEntryNode()); - MachineFunction &MF = CurDAG->getMachineFunction(); - MachineMemOperand *MemOp = MF.getMachineMemOperand( - MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, - LLT(VT), CP->getAlign()); - CurDAG->setNodeMemRefs(cast(Load), {MemOp}); - return Load; -} - static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget) { MVT XLenVT = Subtarget.getXLenVT(); RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); - // If Imm is expensive to build, then we put it into constant pool. - if (Subtarget.useConstantPoolForLargeInts() && - Seq.size() > Subtarget.getMaxBuildIntsCost()) - return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget); - SDNode *Result = nullptr; SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT); for (RISCVMatInt::Inst &Inst : Seq) { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -388,6 +388,9 @@ setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); + if (Subtarget.is64Bit()) + setOperationAction(ISD::Constant, MVT::i64, Custom); + // TODO: On M-mode only targets, the cycle[h] CSR may not be present. // Unfortunately this can't be determined just from the ISA naming string. setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, @@ -2956,6 +2959,29 @@ Store->getMemOperand()->getFlags()); } +static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert(Op.getValueType() == MVT::i64 && "Unexpected VT"); + + // Only do this if some constants should use the constant pool. + if (!Subtarget.useConstantPoolForLargeInts()) + return Op; + + int64_t Imm = cast(Op)->getSExtValue(); + + // All simm32 constants should be handled by isel. + if (isInt<32>(Imm)) + return Op; + + RISCVMatInt::InstSeq Seq = + RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); + if (Seq.size() <= Subtarget.getMaxBuildIntsCost()) + return Op; + + // Expand to a constant pool using the default expansion code. + return SDValue(); +} + SDValue RISCVTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -2971,6 +2997,8 @@ return lowerJumpTable(Op, DAG); case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG); + case ISD::Constant: + return lowerConstant(Op, DAG, Subtarget); case ISD::SELECT: return lowerSELECT(Op, DAG); case ISD::BRCOND: diff --git a/llvm/test/CodeGen/RISCV/rv64zbp-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zbp-intrinsic.ll --- a/llvm/test/CodeGen/RISCV/rv64zbp-intrinsic.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbp-intrinsic.ll @@ -530,8 +530,11 @@ ; RV64ZBP: # %bb.0: ; RV64ZBP-NEXT: lui a1, %hi(.LCPI54_0) ; RV64ZBP-NEXT: ld a1, %lo(.LCPI54_0)(a1) +; RV64ZBP-NEXT: lui a2, %hi(.LCPI54_1) +; RV64ZBP-NEXT: ld a2, %lo(.LCPI54_1)(a2) ; RV64ZBP-NEXT: or a0, a0, a1 ; RV64ZBP-NEXT: orc32 a0, a0 +; RV64ZBP-NEXT: or a0, a0, a2 ; RV64ZBP-NEXT: ret %tmp = or i64 %a, 72624976668147840 ; 0x102040810204080 %tmp2 = call i64 @llvm.riscv.gorc.i64(i64 %tmp, i64 32) diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll --- a/llvm/test/CodeGen/RISCV/rv64zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll @@ -1007,19 +1007,20 @@ ; ; RV64ZBP-LABEL: gorc2b_i64: ; RV64ZBP: # %bb.0: -; RV64ZBP-NEXT: srli a1, a0, 2 -; RV64ZBP-NEXT: or a1, a1, a0 -; RV64ZBP-NEXT: orc2.n a0, a0 -; RV64ZBP-NEXT: lui a2, %hi(.LCPI26_0) -; RV64ZBP-NEXT: ld a2, %lo(.LCPI26_0)(a2) +; RV64ZBP-NEXT: lui a1, %hi(.LCPI26_0) +; RV64ZBP-NEXT: ld a1, %lo(.LCPI26_0)(a1) +; RV64ZBP-NEXT: srli a2, a0, 2 +; RV64ZBP-NEXT: and a2, a2, a1 ; RV64ZBP-NEXT: lui a3, %hi(.LCPI26_1) ; RV64ZBP-NEXT: ld a3, %lo(.LCPI26_1)(a3) -; RV64ZBP-NEXT: slli a1, a1, 2 -; RV64ZBP-NEXT: and a1, a1, a2 -; RV64ZBP-NEXT: srli a2, a0, 2 +; RV64ZBP-NEXT: or a2, a2, a0 +; RV64ZBP-NEXT: orc2.n a0, a0 +; RV64ZBP-NEXT: slli a2, a2, 2 ; RV64ZBP-NEXT: and a2, a2, a3 -; RV64ZBP-NEXT: or a0, a2, a0 -; RV64ZBP-NEXT: or a0, a0, a1 +; RV64ZBP-NEXT: srli a3, a0, 2 +; RV64ZBP-NEXT: and a1, a3, a1 +; RV64ZBP-NEXT: or a0, a1, a0 +; RV64ZBP-NEXT: or a0, a0, a2 ; RV64ZBP-NEXT: ret %and1 = shl i64 %a, 2 %shl1 = and i64 %and1, -3689348814741910324 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -459,9 +459,9 @@ ; RV64-LABEL: buildvec_seq_v16i8_v2i64: ; RV64: # %bb.0: ; RV64-NEXT: lui a1, %hi(.LCPI24_0) -; RV64-NEXT: ld a1, %lo(.LCPI24_0)(a1) +; RV64-NEXT: addi a1, a1, %lo(.LCPI24_0) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: vlse64.v v8, (a1), zero ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1034,15 +1034,15 @@ ; ; RV64-LABEL: mulhu_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI55_0) -; RV64-NEXT: ld a1, %lo(.LCPI55_0)(a1) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV64-NEXT: lui a2, %hi(.LCPI55_1) -; RV64-NEXT: ld a2, %lo(.LCPI55_1)(a2) -; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: lui a1, %hi(.LCPI55_0) +; RV64-NEXT: addi a1, a1, %lo(.LCPI55_0) +; RV64-NEXT: vlse64.v v8, (a1), zero +; RV64-NEXT: lui a1, %hi(.LCPI55_1) +; RV64-NEXT: ld a1, %lo(.LCPI55_1)(a1) ; RV64-NEXT: vle64.v v9, (a0) ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; RV64-NEXT: vmv.s.x v8, a2 +; RV64-NEXT: vmv.s.x v8, a1 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV64-NEXT: vmulhu.vv v8, v9, v8 ; RV64-NEXT: vid.v v9 @@ -1174,12 +1174,12 @@ ; ; RV64-LABEL: mulhs_v4i32: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI58_0) -; RV64-NEXT: ld a1, %lo(.LCPI58_0)(a1) ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: lui a1, %hi(.LCPI58_0) +; RV64-NEXT: addi a1, a1, %lo(.LCPI58_0) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV64-NEXT: vmv.v.x v9, a1 +; RV64-NEXT: vlse64.v v9, (a1), zero ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV64-NEXT: vmulh.vv v8, v8, v9 ; RV64-NEXT: vsra.vi v8, v8, 1 @@ -1229,15 +1229,15 @@ ; ; RV64-LABEL: mulhs_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI59_0) -; RV64-NEXT: ld a1, %lo(.LCPI59_0)(a1) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV64-NEXT: lui a2, %hi(.LCPI59_1) -; RV64-NEXT: ld a2, %lo(.LCPI59_1)(a2) -; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: lui a1, %hi(.LCPI59_0) +; RV64-NEXT: addi a1, a1, %lo(.LCPI59_0) +; RV64-NEXT: vlse64.v v8, (a1), zero +; RV64-NEXT: lui a1, %hi(.LCPI59_1) +; RV64-NEXT: ld a1, %lo(.LCPI59_1)(a1) ; RV64-NEXT: vle64.v v9, (a0) ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; RV64-NEXT: vmv.s.x v8, a2 +; RV64-NEXT: vmv.s.x v8, a1 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV64-NEXT: vmulh.vv v8, v9, v8 ; RV64-NEXT: vid.v v10 @@ -4346,22 +4346,22 @@ ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; LMULMAX1-RV64-NEXT: vmv.s.x v10, a2 ; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI132_0) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI132_0)(a2) -; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI132_1) -; LMULMAX1-RV64-NEXT: ld a3, %lo(.LCPI132_1)(a3) +; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI132_0) ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: vmv.v.x v11, a2 +; LMULMAX1-RV64-NEXT: vlse64.v v11, (a2), zero +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI132_1) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI132_1)(a2) ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-RV64-NEXT: vmv.s.x v11, a3 +; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vmulhu.vv v11, v9, v11 ; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v11 -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI132_2) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI132_2)(a2) ; LMULMAX1-RV64-NEXT: vmulhu.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vid.v v10 -; LMULMAX1-RV64-NEXT: vmv.v.x v11, a2 +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI132_2) +; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI132_2) +; LMULMAX1-RV64-NEXT: vlse64.v v11, (a2), zero ; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI132_3) ; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI132_3)(a2) ; LMULMAX1-RV64-NEXT: vadd.vi v12, v10, 2 @@ -4555,12 +4555,12 @@ ; ; LMULMAX2-RV64-LABEL: mulhs_v8i32: ; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI135_0) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI135_0)(a1) ; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI135_0) +; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI135_0) ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV64-NEXT: vlse64.v v10, (a1), zero ; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-RV64-NEXT: vmulh.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vsra.vi v8, v8, 1 @@ -4665,15 +4665,15 @@ ; LMULMAX2-RV64-NEXT: li a1, 5 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI136_0) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI136_0)(a1) ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI136_1) -; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI136_1)(a2) -; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI136_0) +; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI136_0) +; LMULMAX2-RV64-NEXT: vlse64.v v10, (a1), zero +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI136_1) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI136_1)(a1) ; LMULMAX2-RV64-NEXT: vmv.v.i v12, -1 ; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 0, v0 -; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a2, v0 +; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a1, v0 ; LMULMAX2-RV64-NEXT: vmulh.vv v10, v8, v10 ; LMULMAX2-RV64-NEXT: vmacc.vv v10, v8, v12 ; LMULMAX2-RV64-NEXT: li a1, 63 @@ -4704,11 +4704,11 @@ ; ; LMULMAX1-RV64-LABEL: mulhs_v4i64: ; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI136_0) -; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI136_0)(a1) ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vmv.v.x v9, a1 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI136_0) +; LMULMAX1-RV64-NEXT: addi a1, a1, %lo(.LCPI136_0) +; LMULMAX1-RV64-NEXT: vlse64.v v9, (a1), zero ; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI136_1) ; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI136_1)(a1) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -674,9 +674,9 @@ ; RV64-LMULMAX4-LABEL: buildvec_mask_v64i1: ; RV64-LMULMAX4: # %bb.0: ; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI19_0) -; RV64-LMULMAX4-NEXT: ld a0, %lo(.LCPI19_0)(a0) +; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI19_0) ; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV64-LMULMAX4-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX4-NEXT: vlse64.v v0, (a0), zero ; RV64-LMULMAX4-NEXT: ret ; ; RV32-LMULMAX8-LABEL: buildvec_mask_v64i1: @@ -695,9 +695,9 @@ ; RV64-LMULMAX8-LABEL: buildvec_mask_v64i1: ; RV64-LMULMAX8: # %bb.0: ; RV64-LMULMAX8-NEXT: lui a0, %hi(.LCPI19_0) -; RV64-LMULMAX8-NEXT: ld a0, %lo(.LCPI19_0)(a0) +; RV64-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI19_0) ; RV64-LMULMAX8-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV64-LMULMAX8-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX8-NEXT: vlse64.v v0, (a0), zero ; RV64-LMULMAX8-NEXT: ret ret <64 x i1> } @@ -806,12 +806,12 @@ ; RV64-LMULMAX4-LABEL: buildvec_mask_v128i1: ; RV64-LMULMAX4: # %bb.0: ; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI20_0) -; RV64-LMULMAX4-NEXT: ld a0, %lo(.LCPI20_0)(a0) -; RV64-LMULMAX4-NEXT: lui a1, %hi(.LCPI20_1) -; RV64-LMULMAX4-NEXT: ld a1, %lo(.LCPI20_1)(a1) +; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI20_0) ; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV64-LMULMAX4-NEXT: vmv.s.x v0, a0 -; RV64-LMULMAX4-NEXT: vmv.s.x v8, a1 +; RV64-LMULMAX4-NEXT: vlse64.v v0, (a0), zero +; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI20_1) +; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI20_1) +; RV64-LMULMAX4-NEXT: vlse64.v v8, (a0), zero ; RV64-LMULMAX4-NEXT: ret ; ; RV32-LMULMAX8-LABEL: buildvec_mask_v128i1: @@ -948,12 +948,12 @@ ; RV64-LMULMAX4-LABEL: buildvec_mask_optsize_v128i1: ; RV64-LMULMAX4: # %bb.0: ; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI21_0) -; RV64-LMULMAX4-NEXT: ld a0, %lo(.LCPI21_0)(a0) -; RV64-LMULMAX4-NEXT: lui a1, %hi(.LCPI21_1) -; RV64-LMULMAX4-NEXT: ld a1, %lo(.LCPI21_1)(a1) +; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI21_0) ; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV64-LMULMAX4-NEXT: vmv.s.x v0, a0 -; RV64-LMULMAX4-NEXT: vmv.s.x v8, a1 +; RV64-LMULMAX4-NEXT: vlse64.v v0, (a0), zero +; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI21_1) +; RV64-LMULMAX4-NEXT: addi a0, a0, %lo(.LCPI21_1) +; RV64-LMULMAX4-NEXT: vlse64.v v8, (a0), zero ; RV64-LMULMAX4-NEXT: ret ; ; RV32-LMULMAX8-LABEL: buildvec_mask_optsize_v128i1: