diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -206,6 +206,29 @@ RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); + // See if we can create this constant as (ADD (SLLI X, 32), X) where X is at + // worst an LUI+ADDIW. This will require an extra register, but avoids a + // constant pool. + if (Seq.size() > 3) { + int64_t LoVal = SignExtend64<32>(Imm); + int64_t HiVal = SignExtend64<32>((Imm - LoVal) >> 32); + if (LoVal == HiVal) { + RISCVMatInt::InstSeq SeqLo = + RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits()); + if ((SeqLo.size() + 2) < Seq.size()) { + SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo); + + SDValue SLLI = SDValue( + CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo, + CurDAG->getTargetConstant(32, DL, VT)), + 0); + return SDValue(CurDAG->getMachineNode(RISCV::ADD, DL, VT, Lo, SLLI), + 0); + } + } + } + + // Otherwise, use the original sequence. return selectImmSeq(CurDAG, DL, VT, Seq); } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4400,6 +4400,18 @@ if (Seq.size() <= Subtarget.getMaxBuildIntsCost()) return Op; + // Special case. See if we can build the constant as (ADD (SLLI X, 32), X) do + // that if it will avoid a constant pool. + // It will require an extra temporary register though. + int64_t LoVal = SignExtend64<32>(Imm); + int64_t HiVal = SignExtend64<32>((Imm - LoVal) >> 32); + if (LoVal == HiVal) { + RISCVMatInt::InstSeq SeqLo = + RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits()); + if ((SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost()) + return Op; + } + // Expand to a constant pool using the default expansion code. return SDValue(); } diff --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll @@ -620,25 +620,31 @@ ; RV64I-NEXT: slli a2, a2, 40 ; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: lui a2, %hi(.LCPI6_0) -; RV64I-NEXT: ld a2, %lo(.LCPI6_0)(a2) ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: slli a3, a2, 32 +; RV64I-NEXT: add a2, a2, a3 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a2, %hi(.LCPI6_1) -; RV64I-NEXT: ld a2, %lo(.LCPI6_1)(a2) ; RV64I-NEXT: slli a0, a0, 4 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: slli a3, a2, 32 +; RV64I-NEXT: add a2, a2, a3 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a2, %hi(.LCPI6_2) -; RV64I-NEXT: ld a2, %lo(.LCPI6_2)(a2) ; RV64I-NEXT: slli a0, a0, 2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a3, a2, 32 +; RV64I-NEXT: add a2, a2, a3 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: slli a0, a0, 1 @@ -690,28 +696,34 @@ ; ; RV64ZBB-LABEL: test_bitreverse_i64: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: lui a1, %hi(.LCPI6_0) -; RV64ZBB-NEXT: ld a1, %lo(.LCPI6_0)(a1) ; RV64ZBB-NEXT: rev8 a0, a0 -; RV64ZBB-NEXT: srli a2, a0, 4 -; RV64ZBB-NEXT: and a2, a2, a1 -; RV64ZBB-NEXT: and a0, a0, a1 -; RV64ZBB-NEXT: lui a1, %hi(.LCPI6_1) -; RV64ZBB-NEXT: ld a1, %lo(.LCPI6_1)(a1) +; RV64ZBB-NEXT: srli a1, a0, 4 +; RV64ZBB-NEXT: lui a2, 61681 +; RV64ZBB-NEXT: addiw a2, a2, -241 +; RV64ZBB-NEXT: slli a3, a2, 32 +; RV64ZBB-NEXT: add a2, a2, a3 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 ; RV64ZBB-NEXT: slli a0, a0, 4 -; RV64ZBB-NEXT: or a0, a2, a0 -; RV64ZBB-NEXT: srli a2, a0, 2 -; RV64ZBB-NEXT: and a2, a2, a1 -; RV64ZBB-NEXT: and a0, a0, a1 -; RV64ZBB-NEXT: lui a1, %hi(.LCPI6_2) -; RV64ZBB-NEXT: ld a1, %lo(.LCPI6_2)(a1) +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 2 +; RV64ZBB-NEXT: lui a2, 209715 +; RV64ZBB-NEXT: addiw a2, a2, 819 +; RV64ZBB-NEXT: slli a3, a2, 32 +; RV64ZBB-NEXT: add a2, a2, a3 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 ; RV64ZBB-NEXT: slli a0, a0, 2 -; RV64ZBB-NEXT: or a0, a2, a0 -; RV64ZBB-NEXT: srli a2, a0, 1 -; RV64ZBB-NEXT: and a2, a2, a1 -; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 1 +; RV64ZBB-NEXT: lui a2, 349525 +; RV64ZBB-NEXT: addiw a2, a2, 1365 +; RV64ZBB-NEXT: slli a3, a2, 32 +; RV64ZBB-NEXT: add a2, a2, a3 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 ; RV64ZBB-NEXT: slli a0, a0, 1 -; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: or a0, a1, a0 ; RV64ZBB-NEXT: ret ; ; RV32ZBKB-LABEL: test_bitreverse_i64: @@ -1005,27 +1017,33 @@ ; ; RV64I-LABEL: test_bswap_bitreverse_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, %hi(.LCPI9_0) -; RV64I-NEXT: ld a1, %lo(.LCPI9_0)(a1) -; RV64I-NEXT: srli a2, a0, 4 -; RV64I-NEXT: and a2, a2, a1 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI9_1) -; RV64I-NEXT: ld a1, %lo(.LCPI9_1)(a1) +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: slli a3, a2, 32 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: slli a0, a0, 4 -; RV64I-NEXT: or a0, a2, a0 -; RV64I-NEXT: srli a2, a0, 2 -; RV64I-NEXT: and a2, a2, a1 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI9_2) -; RV64I-NEXT: ld a1, %lo(.LCPI9_2)(a1) +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: slli a3, a2, 32 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: slli a0, a0, 2 -; RV64I-NEXT: or a0, a2, a0 -; RV64I-NEXT: srli a2, a0, 1 -; RV64I-NEXT: and a2, a2, a1 -; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a3, a2, 32 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: slli a0, a0, 1 -; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: test_bswap_bitreverse_i64: @@ -1070,27 +1088,33 @@ ; ; RV64ZBB-LABEL: test_bswap_bitreverse_i64: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: lui a1, %hi(.LCPI9_0) -; RV64ZBB-NEXT: ld a1, %lo(.LCPI9_0)(a1) -; RV64ZBB-NEXT: srli a2, a0, 4 -; RV64ZBB-NEXT: and a2, a2, a1 -; RV64ZBB-NEXT: and a0, a0, a1 -; RV64ZBB-NEXT: lui a1, %hi(.LCPI9_1) -; RV64ZBB-NEXT: ld a1, %lo(.LCPI9_1)(a1) +; RV64ZBB-NEXT: srli a1, a0, 4 +; RV64ZBB-NEXT: lui a2, 61681 +; RV64ZBB-NEXT: addiw a2, a2, -241 +; RV64ZBB-NEXT: slli a3, a2, 32 +; RV64ZBB-NEXT: add a2, a2, a3 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 ; RV64ZBB-NEXT: slli a0, a0, 4 -; RV64ZBB-NEXT: or a0, a2, a0 -; RV64ZBB-NEXT: srli a2, a0, 2 -; RV64ZBB-NEXT: and a2, a2, a1 -; RV64ZBB-NEXT: and a0, a0, a1 -; RV64ZBB-NEXT: lui a1, %hi(.LCPI9_2) -; RV64ZBB-NEXT: ld a1, %lo(.LCPI9_2)(a1) +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 2 +; RV64ZBB-NEXT: lui a2, 209715 +; RV64ZBB-NEXT: addiw a2, a2, 819 +; RV64ZBB-NEXT: slli a3, a2, 32 +; RV64ZBB-NEXT: add a2, a2, a3 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 ; RV64ZBB-NEXT: slli a0, a0, 2 -; RV64ZBB-NEXT: or a0, a2, a0 -; RV64ZBB-NEXT: srli a2, a0, 1 -; RV64ZBB-NEXT: and a2, a2, a1 -; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 1 +; RV64ZBB-NEXT: lui a2, 349525 +; RV64ZBB-NEXT: addiw a2, a2, 1365 +; RV64ZBB-NEXT: slli a3, a2, 32 +; RV64ZBB-NEXT: add a2, a2, a3 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 ; RV64ZBB-NEXT: slli a0, a0, 1 -; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: or a0, a1, a0 ; RV64ZBB-NEXT: ret ; ; RV32ZBKB-LABEL: test_bswap_bitreverse_i64: @@ -1381,27 +1405,33 @@ ; ; RV64I-LABEL: test_bitreverse_bswap_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, %hi(.LCPI12_0) -; RV64I-NEXT: ld a1, %lo(.LCPI12_0)(a1) -; RV64I-NEXT: srli a2, a0, 4 -; RV64I-NEXT: and a2, a2, a1 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI12_1) -; RV64I-NEXT: ld a1, %lo(.LCPI12_1)(a1) +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: slli a3, a2, 32 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: slli a0, a0, 4 -; RV64I-NEXT: or a0, a2, a0 -; RV64I-NEXT: srli a2, a0, 2 -; RV64I-NEXT: and a2, a2, a1 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI12_2) -; RV64I-NEXT: ld a1, %lo(.LCPI12_2)(a1) +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: slli a3, a2, 32 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: slli a0, a0, 2 -; RV64I-NEXT: or a0, a2, a0 -; RV64I-NEXT: srli a2, a0, 1 -; RV64I-NEXT: and a2, a2, a1 -; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a3, a2, 32 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: slli a0, a0, 1 -; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: test_bitreverse_bswap_i64: @@ -1446,27 +1476,33 @@ ; ; RV64ZBB-LABEL: test_bitreverse_bswap_i64: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: lui a1, %hi(.LCPI12_0) -; RV64ZBB-NEXT: ld a1, %lo(.LCPI12_0)(a1) -; RV64ZBB-NEXT: srli a2, a0, 4 -; RV64ZBB-NEXT: and a2, a2, a1 -; RV64ZBB-NEXT: and a0, a0, a1 -; RV64ZBB-NEXT: lui a1, %hi(.LCPI12_1) -; RV64ZBB-NEXT: ld a1, %lo(.LCPI12_1)(a1) +; RV64ZBB-NEXT: srli a1, a0, 4 +; RV64ZBB-NEXT: lui a2, 61681 +; RV64ZBB-NEXT: addiw a2, a2, -241 +; RV64ZBB-NEXT: slli a3, a2, 32 +; RV64ZBB-NEXT: add a2, a2, a3 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 ; RV64ZBB-NEXT: slli a0, a0, 4 -; RV64ZBB-NEXT: or a0, a2, a0 -; RV64ZBB-NEXT: srli a2, a0, 2 -; RV64ZBB-NEXT: and a2, a2, a1 -; RV64ZBB-NEXT: and a0, a0, a1 -; RV64ZBB-NEXT: lui a1, %hi(.LCPI12_2) -; RV64ZBB-NEXT: ld a1, %lo(.LCPI12_2)(a1) +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 2 +; RV64ZBB-NEXT: lui a2, 209715 +; RV64ZBB-NEXT: addiw a2, a2, 819 +; RV64ZBB-NEXT: slli a3, a2, 32 +; RV64ZBB-NEXT: add a2, a2, a3 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 ; RV64ZBB-NEXT: slli a0, a0, 2 -; RV64ZBB-NEXT: or a0, a2, a0 -; RV64ZBB-NEXT: srli a2, a0, 1 -; RV64ZBB-NEXT: and a2, a2, a1 -; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 1 +; RV64ZBB-NEXT: lui a2, 349525 +; RV64ZBB-NEXT: addiw a2, a2, 1365 +; RV64ZBB-NEXT: slli a3, a2, 32 +; RV64ZBB-NEXT: add a2, a2, a3 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 ; RV64ZBB-NEXT: slli a0, a0, 1 -; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: or a0, a1, a0 ; RV64ZBB-NEXT: ret ; ; RV32ZBKB-LABEL: test_bitreverse_bswap_i64: diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll --- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll @@ -1459,24 +1459,32 @@ ; RV64I-NEXT: srli a1, a0, 32 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: lui a1, %hi(.LCPI11_0) -; RV64I-NEXT: ld a1, %lo(.LCPI11_0)(a1) -; RV64I-NEXT: lui a2, %hi(.LCPI11_1) -; RV64I-NEXT: ld a2, %lo(.LCPI11_1)(a2) -; RV64I-NEXT: srli a3, a0, 1 -; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a3, a2, 32 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: and a1, a0, a2 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: and a2, a0, a1 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a2, %hi(.LCPI11_2) -; RV64I-NEXT: ld a2, %lo(.LCPI11_2)(a2) -; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a1, %hi(.LCPI11_3) -; RV64I-NEXT: ld a1, %lo(.LCPI11_3)(a1) +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -1568,25 +1576,33 @@ ; RV64M-NEXT: srli a1, a0, 32 ; RV64M-NEXT: or a0, a0, a1 ; RV64M-NEXT: not a0, a0 -; RV64M-NEXT: lui a1, %hi(.LCPI11_0) -; RV64M-NEXT: ld a1, %lo(.LCPI11_0)(a1) -; RV64M-NEXT: lui a2, %hi(.LCPI11_1) -; RV64M-NEXT: ld a2, %lo(.LCPI11_1)(a2) -; RV64M-NEXT: srli a3, a0, 1 -; RV64M-NEXT: and a1, a3, a1 +; RV64M-NEXT: srli a1, a0, 1 +; RV64M-NEXT: lui a2, 349525 +; RV64M-NEXT: addiw a2, a2, 1365 +; RV64M-NEXT: slli a3, a2, 32 +; RV64M-NEXT: add a2, a2, a3 +; RV64M-NEXT: and a1, a1, a2 ; RV64M-NEXT: sub a0, a0, a1 -; RV64M-NEXT: and a1, a0, a2 +; RV64M-NEXT: lui a1, 209715 +; RV64M-NEXT: addiw a1, a1, 819 +; RV64M-NEXT: slli a2, a1, 32 +; RV64M-NEXT: add a1, a1, a2 +; RV64M-NEXT: and a2, a0, a1 ; RV64M-NEXT: srli a0, a0, 2 -; RV64M-NEXT: and a0, a0, a2 -; RV64M-NEXT: add a0, a1, a0 -; RV64M-NEXT: lui a1, %hi(.LCPI11_2) -; RV64M-NEXT: ld a1, %lo(.LCPI11_2)(a1) -; RV64M-NEXT: lui a2, %hi(.LCPI11_3) -; RV64M-NEXT: ld a2, %lo(.LCPI11_3)(a2) -; RV64M-NEXT: srli a3, a0, 4 -; RV64M-NEXT: add a0, a0, a3 ; RV64M-NEXT: and a0, a0, a1 -; RV64M-NEXT: mul a0, a0, a2 +; RV64M-NEXT: add a0, a2, a0 +; RV64M-NEXT: srli a1, a0, 4 +; RV64M-NEXT: add a0, a0, a1 +; RV64M-NEXT: lui a1, 61681 +; RV64M-NEXT: addiw a1, a1, -241 +; RV64M-NEXT: slli a2, a1, 32 +; RV64M-NEXT: add a1, a1, a2 +; RV64M-NEXT: and a0, a0, a1 +; RV64M-NEXT: lui a1, 4112 +; RV64M-NEXT: addiw a1, a1, 257 +; RV64M-NEXT: slli a2, a1, 32 +; RV64M-NEXT: add a1, a1, a2 +; RV64M-NEXT: mul a0, a0, a1 ; RV64M-NEXT: srli a0, a0, 56 ; RV64M-NEXT: ret ; RV64M-NEXT: .LBB11_2: @@ -2092,24 +2108,32 @@ ; RV64I-NEXT: srli a1, a0, 32 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: lui a1, %hi(.LCPI15_0) -; RV64I-NEXT: ld a1, %lo(.LCPI15_0)(a1) -; RV64I-NEXT: lui a2, %hi(.LCPI15_1) -; RV64I-NEXT: ld a2, %lo(.LCPI15_1)(a2) -; RV64I-NEXT: srli a3, a0, 1 -; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a3, a2, 32 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: and a1, a0, a2 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: and a2, a0, a1 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a2, %hi(.LCPI15_2) -; RV64I-NEXT: ld a2, %lo(.LCPI15_2)(a2) -; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a1, %hi(.LCPI15_3) -; RV64I-NEXT: ld a1, %lo(.LCPI15_3)(a1) +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -2196,25 +2220,33 @@ ; RV64M-NEXT: srli a1, a0, 32 ; RV64M-NEXT: or a0, a0, a1 ; RV64M-NEXT: not a0, a0 -; RV64M-NEXT: lui a1, %hi(.LCPI15_0) -; RV64M-NEXT: ld a1, %lo(.LCPI15_0)(a1) -; RV64M-NEXT: lui a2, %hi(.LCPI15_1) -; RV64M-NEXT: ld a2, %lo(.LCPI15_1)(a2) -; RV64M-NEXT: srli a3, a0, 1 -; RV64M-NEXT: and a1, a3, a1 +; RV64M-NEXT: srli a1, a0, 1 +; RV64M-NEXT: lui a2, 349525 +; RV64M-NEXT: addiw a2, a2, 1365 +; RV64M-NEXT: slli a3, a2, 32 +; RV64M-NEXT: add a2, a2, a3 +; RV64M-NEXT: and a1, a1, a2 ; RV64M-NEXT: sub a0, a0, a1 -; RV64M-NEXT: and a1, a0, a2 +; RV64M-NEXT: lui a1, 209715 +; RV64M-NEXT: addiw a1, a1, 819 +; RV64M-NEXT: slli a2, a1, 32 +; RV64M-NEXT: add a1, a1, a2 +; RV64M-NEXT: and a2, a0, a1 ; RV64M-NEXT: srli a0, a0, 2 -; RV64M-NEXT: and a0, a0, a2 -; RV64M-NEXT: add a0, a1, a0 -; RV64M-NEXT: lui a1, %hi(.LCPI15_2) -; RV64M-NEXT: ld a1, %lo(.LCPI15_2)(a1) -; RV64M-NEXT: lui a2, %hi(.LCPI15_3) -; RV64M-NEXT: ld a2, %lo(.LCPI15_3)(a2) -; RV64M-NEXT: srli a3, a0, 4 -; RV64M-NEXT: add a0, a0, a3 ; RV64M-NEXT: and a0, a0, a1 -; RV64M-NEXT: mul a0, a0, a2 +; RV64M-NEXT: add a0, a2, a0 +; RV64M-NEXT: srli a1, a0, 4 +; RV64M-NEXT: add a0, a0, a1 +; RV64M-NEXT: lui a1, 61681 +; RV64M-NEXT: addiw a1, a1, -241 +; RV64M-NEXT: slli a2, a1, 32 +; RV64M-NEXT: add a1, a1, a2 +; RV64M-NEXT: and a0, a0, a1 +; RV64M-NEXT: lui a1, 4112 +; RV64M-NEXT: addiw a1, a1, 257 +; RV64M-NEXT: slli a2, a1, 32 +; RV64M-NEXT: add a1, a1, a2 +; RV64M-NEXT: mul a0, a0, a1 ; RV64M-NEXT: srli a0, a0, 56 ; RV64M-NEXT: ret ; @@ -2665,24 +2697,32 @@ ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, %hi(.LCPI19_0) -; RV64I-NEXT: ld a1, %lo(.LCPI19_0)(a1) -; RV64I-NEXT: lui a2, %hi(.LCPI19_1) -; RV64I-NEXT: ld a2, %lo(.LCPI19_1)(a2) -; RV64I-NEXT: srli a3, a0, 1 -; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a3, a2, 32 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: and a1, a0, a2 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: and a2, a0, a1 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a2, %hi(.LCPI19_2) -; RV64I-NEXT: ld a2, %lo(.LCPI19_2)(a2) -; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a1, %hi(.LCPI19_3) -; RV64I-NEXT: ld a1, %lo(.LCPI19_3)(a1) +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -2729,25 +2769,33 @@ ; ; RV64M-LABEL: test_ctpop_i64: ; RV64M: # %bb.0: -; RV64M-NEXT: lui a1, %hi(.LCPI19_0) -; RV64M-NEXT: ld a1, %lo(.LCPI19_0)(a1) -; RV64M-NEXT: lui a2, %hi(.LCPI19_1) -; RV64M-NEXT: ld a2, %lo(.LCPI19_1)(a2) -; RV64M-NEXT: srli a3, a0, 1 -; RV64M-NEXT: and a1, a3, a1 +; RV64M-NEXT: srli a1, a0, 1 +; RV64M-NEXT: lui a2, 349525 +; RV64M-NEXT: addiw a2, a2, 1365 +; RV64M-NEXT: slli a3, a2, 32 +; RV64M-NEXT: add a2, a2, a3 +; RV64M-NEXT: and a1, a1, a2 ; RV64M-NEXT: sub a0, a0, a1 -; RV64M-NEXT: and a1, a0, a2 +; RV64M-NEXT: lui a1, 209715 +; RV64M-NEXT: addiw a1, a1, 819 +; RV64M-NEXT: slli a2, a1, 32 +; RV64M-NEXT: add a1, a1, a2 +; RV64M-NEXT: and a2, a0, a1 ; RV64M-NEXT: srli a0, a0, 2 -; RV64M-NEXT: and a0, a0, a2 -; RV64M-NEXT: add a0, a1, a0 -; RV64M-NEXT: lui a1, %hi(.LCPI19_2) -; RV64M-NEXT: ld a1, %lo(.LCPI19_2)(a1) -; RV64M-NEXT: lui a2, %hi(.LCPI19_3) -; RV64M-NEXT: ld a2, %lo(.LCPI19_3)(a2) -; RV64M-NEXT: srli a3, a0, 4 -; RV64M-NEXT: add a0, a0, a3 ; RV64M-NEXT: and a0, a0, a1 -; RV64M-NEXT: mul a0, a0, a2 +; RV64M-NEXT: add a0, a2, a0 +; RV64M-NEXT: srli a1, a0, 4 +; RV64M-NEXT: add a0, a0, a1 +; RV64M-NEXT: lui a1, 61681 +; RV64M-NEXT: addiw a1, a1, -241 +; RV64M-NEXT: slli a2, a1, 32 +; RV64M-NEXT: add a1, a1, a2 +; RV64M-NEXT: and a0, a0, a1 +; RV64M-NEXT: lui a1, 4112 +; RV64M-NEXT: addiw a1, a1, 257 +; RV64M-NEXT: slli a2, a1, 32 +; RV64M-NEXT: add a1, a1, a2 +; RV64M-NEXT: mul a0, a0, a1 ; RV64M-NEXT: srli a0, a0, 56 ; RV64M-NEXT: ret ; @@ -2825,24 +2873,32 @@ ; RV64XTHEADBB: # %bb.0: ; RV64XTHEADBB-NEXT: addi sp, sp, -16 ; RV64XTHEADBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64XTHEADBB-NEXT: lui a1, %hi(.LCPI19_0) -; RV64XTHEADBB-NEXT: ld a1, %lo(.LCPI19_0)(a1) -; RV64XTHEADBB-NEXT: lui a2, %hi(.LCPI19_1) -; RV64XTHEADBB-NEXT: ld a2, %lo(.LCPI19_1)(a2) -; RV64XTHEADBB-NEXT: srli a3, a0, 1 -; RV64XTHEADBB-NEXT: and a1, a3, a1 +; RV64XTHEADBB-NEXT: srli a1, a0, 1 +; RV64XTHEADBB-NEXT: lui a2, 349525 +; RV64XTHEADBB-NEXT: addiw a2, a2, 1365 +; RV64XTHEADBB-NEXT: slli a3, a2, 32 +; RV64XTHEADBB-NEXT: add a2, a2, a3 +; RV64XTHEADBB-NEXT: and a1, a1, a2 ; RV64XTHEADBB-NEXT: sub a0, a0, a1 -; RV64XTHEADBB-NEXT: and a1, a0, a2 +; RV64XTHEADBB-NEXT: lui a1, 209715 +; RV64XTHEADBB-NEXT: addiw a1, a1, 819 +; RV64XTHEADBB-NEXT: slli a2, a1, 32 +; RV64XTHEADBB-NEXT: add a1, a1, a2 +; RV64XTHEADBB-NEXT: and a2, a0, a1 ; RV64XTHEADBB-NEXT: srli a0, a0, 2 -; RV64XTHEADBB-NEXT: and a0, a0, a2 -; RV64XTHEADBB-NEXT: lui a2, %hi(.LCPI19_2) -; RV64XTHEADBB-NEXT: ld a2, %lo(.LCPI19_2)(a2) -; RV64XTHEADBB-NEXT: add a0, a1, a0 +; RV64XTHEADBB-NEXT: and a0, a0, a1 +; RV64XTHEADBB-NEXT: add a0, a2, a0 ; RV64XTHEADBB-NEXT: srli a1, a0, 4 ; RV64XTHEADBB-NEXT: add a0, a0, a1 -; RV64XTHEADBB-NEXT: and a0, a0, a2 -; RV64XTHEADBB-NEXT: lui a1, %hi(.LCPI19_3) -; RV64XTHEADBB-NEXT: ld a1, %lo(.LCPI19_3)(a1) +; RV64XTHEADBB-NEXT: lui a1, 61681 +; RV64XTHEADBB-NEXT: addiw a1, a1, -241 +; RV64XTHEADBB-NEXT: slli a2, a1, 32 +; RV64XTHEADBB-NEXT: add a1, a1, a2 +; RV64XTHEADBB-NEXT: and a0, a0, a1 +; RV64XTHEADBB-NEXT: lui a1, 4112 +; RV64XTHEADBB-NEXT: addiw a1, a1, 257 +; RV64XTHEADBB-NEXT: slli a2, a1, 32 +; RV64XTHEADBB-NEXT: add a1, a1, a2 ; RV64XTHEADBB-NEXT: call __muldi3@plt ; RV64XTHEADBB-NEXT: srli a0, a0, 56 ; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/div-by-constant.ll b/llvm/test/CodeGen/RISCV/div-by-constant.ll --- a/llvm/test/CodeGen/RISCV/div-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/div-by-constant.ll @@ -103,8 +103,10 @@ ; ; RV64-LABEL: udiv64_constant_no_add: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI2_0) -; RV64-NEXT: ld a1, %lo(.LCPI2_0)(a1) +; RV64-NEXT: lui a1, 838861 +; RV64-NEXT: addiw a1, a1, -819 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 ; RV64-NEXT: mulhu a0, a0, a1 ; RV64-NEXT: srli a0, a0, 2 ; RV64-NEXT: ret @@ -437,8 +439,10 @@ ; ; RV64-LABEL: sdiv64_constant_add_srai: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI14_0) -; RV64-NEXT: ld a1, %lo(.LCPI14_0)(a1) +; RV64-NEXT: lui a1, 559241 +; RV64-NEXT: addiw a1, a1, -1911 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 ; RV64-NEXT: mulh a1, a0, a1 ; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: srli a1, a0, 63 @@ -463,8 +467,10 @@ ; ; RV64-LABEL: sdiv64_constant_sub_srai: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI15_0) -; RV64-NEXT: ld a1, %lo(.LCPI15_0)(a1) +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 ; RV64-NEXT: mulh a1, a0, a1 ; RV64-NEXT: sub a1, a1, a0 ; RV64-NEXT: srli a0, a1, 63 diff --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll --- a/llvm/test/CodeGen/RISCV/div.ll +++ b/llvm/test/CodeGen/RISCV/div.ll @@ -210,8 +210,10 @@ ; ; RV64IM-LABEL: udiv64_constant: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a1, %hi(.LCPI5_0) -; RV64IM-NEXT: ld a1, %lo(.LCPI5_0)(a1) +; RV64IM-NEXT: lui a1, 838861 +; RV64IM-NEXT: addiw a1, a1, -819 +; RV64IM-NEXT: slli a2, a1, 32 +; RV64IM-NEXT: add a1, a1, a2 ; RV64IM-NEXT: mulhu a0, a0, a1 ; RV64IM-NEXT: srli a0, a0, 2 ; RV64IM-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/pr56457.ll b/llvm/test/CodeGen/RISCV/pr56457.ll --- a/llvm/test/CodeGen/RISCV/pr56457.ll +++ b/llvm/test/CodeGen/RISCV/pr56457.ll @@ -21,27 +21,33 @@ ; CHECK-NEXT: srli a1, a1, 57 ; CHECK-NEXT: or a0, a0, a1 ; CHECK-NEXT: not a0, a0 +; CHECK-NEXT: srli a1, a0, 1 +; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: addiw a2, a2, 1365 +; CHECK-NEXT: and a1, a1, a2 ; CHECK-NEXT: slli a0, a0, 49 ; CHECK-NEXT: srli a0, a0, 49 -; CHECK-NEXT: lui a1, %hi(.LCPI0_0) -; CHECK-NEXT: ld a1, %lo(.LCPI0_0)(a1) -; CHECK-NEXT: lui a2, %hi(.LCPI0_1) -; CHECK-NEXT: ld a2, %lo(.LCPI0_1)(a2) -; CHECK-NEXT: srli a3, a0, 1 -; CHECK-NEXT: and a1, a3, a1 ; CHECK-NEXT: sub a0, a0, a1 -; CHECK-NEXT: and a1, a0, a2 +; CHECK-NEXT: lui a1, 209715 +; CHECK-NEXT: addiw a1, a1, 819 +; CHECK-NEXT: slli a2, a1, 32 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: and a2, a0, a1 ; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: lui a1, %hi(.LCPI0_2) -; CHECK-NEXT: ld a1, %lo(.LCPI0_2)(a1) -; CHECK-NEXT: lui a2, %hi(.LCPI0_3) -; CHECK-NEXT: ld a2, %lo(.LCPI0_3)(a2) -; CHECK-NEXT: srli a3, a0, 4 -; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: srli a1, a0, 4 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: lui a1, 61681 +; CHECK-NEXT: addiw a1, a1, -241 +; CHECK-NEXT: slli a2, a1, 32 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: lui a1, 4112 +; CHECK-NEXT: addiw a1, a1, 257 +; CHECK-NEXT: slli a2, a1, 32 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: srli a0, a0, 56 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll --- a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll @@ -332,24 +332,32 @@ ; RV64I-NEXT: srli a1, a0, 32 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: lui a1, %hi(.LCPI5_0) -; RV64I-NEXT: ld a1, %lo(.LCPI5_0)(a1) -; RV64I-NEXT: lui a2, %hi(.LCPI5_1) -; RV64I-NEXT: ld a2, %lo(.LCPI5_1)(a2) -; RV64I-NEXT: srli a3, a0, 1 -; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a3, a2, 32 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: and a1, a0, a2 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: and a2, a0, a1 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a2, %hi(.LCPI5_2) -; RV64I-NEXT: ld a2, %lo(.LCPI5_2)(a2) -; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a1, %hi(.LCPI5_3) -; RV64I-NEXT: ld a1, %lo(.LCPI5_3)(a1) +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -322,24 +322,32 @@ ; RV64I-NEXT: srli a1, a0, 32 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: lui a1, %hi(.LCPI5_0) -; RV64I-NEXT: ld a1, %lo(.LCPI5_0)(a1) -; RV64I-NEXT: lui a2, %hi(.LCPI5_1) -; RV64I-NEXT: ld a2, %lo(.LCPI5_1)(a2) -; RV64I-NEXT: srli a3, a0, 1 -; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a3, a2, 32 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: and a1, a0, a2 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: and a2, a0, a1 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a2, %hi(.LCPI5_2) -; RV64I-NEXT: ld a2, %lo(.LCPI5_2)(a2) -; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a1, %hi(.LCPI5_3) -; RV64I-NEXT: ld a1, %lo(.LCPI5_3)(a1) +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -617,24 +625,32 @@ ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, %hi(.LCPI13_0) -; RV64I-NEXT: ld a1, %lo(.LCPI13_0)(a1) -; RV64I-NEXT: lui a2, %hi(.LCPI13_1) -; RV64I-NEXT: ld a2, %lo(.LCPI13_1)(a2) -; RV64I-NEXT: srli a3, a0, 1 -; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: slli a3, a2, 32 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: and a1, a0, a2 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: and a2, a0, a1 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a2, %hi(.LCPI13_2) -; RV64I-NEXT: ld a2, %lo(.LCPI13_2)(a2) -; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a1, %hi(.LCPI13_3) -; RV64I-NEXT: ld a1, %lo(.LCPI13_3)(a1) +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a2, a1, 32 +; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll @@ -1063,25 +1063,31 @@ ; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 ; RV64-NEXT: vor.vv v8, v11, v8 -; RV64-NEXT: lui a0, %hi(.LCPI18_0) -; RV64-NEXT: ld a0, %lo(.LCPI18_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v9, v8, 4 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI18_1) -; RV64-NEXT: ld a0, %lo(.LCPI18_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 2 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI18_2) -; RV64-NEXT: ld a0, %lo(.LCPI18_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -1191,25 +1197,31 @@ ; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 ; RV64-NEXT: vor.vv v8, v14, v8 -; RV64-NEXT: lui a0, %hi(.LCPI19_0) -; RV64-NEXT: ld a0, %lo(.LCPI19_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vsrl.vi v10, v8, 4 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI19_1) -; RV64-NEXT: ld a0, %lo(.LCPI19_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: vsrl.vi v10, v8, 2 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI19_2) -; RV64-NEXT: ld a0, %lo(.LCPI19_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -1319,25 +1331,31 @@ ; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 ; RV64-NEXT: vor.vv v8, v20, v8 -; RV64-NEXT: lui a0, %hi(.LCPI20_0) -; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vsrl.vi v12, v8, 4 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI20_1) -; RV64-NEXT: ld a0, %lo(.LCPI20_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v12, v8 ; RV64-NEXT: vsrl.vi v12, v8, 2 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI20_2) -; RV64-NEXT: ld a0, %lo(.LCPI20_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v12, v8 ; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -1461,25 +1479,31 @@ ; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 ; RV64-NEXT: vor.vv v8, v0, v8 -; RV64-NEXT: lui a0, %hi(.LCPI21_0) -; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vsrl.vi v16, v8, 4 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI21_1) -; RV64-NEXT: ld a0, %lo(.LCPI21_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 2 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI21_2) -; RV64-NEXT: ld a0, %lo(.LCPI21_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -2073,25 +2073,31 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v11, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI36_0) -; RV64-NEXT: ld a0, %lo(.LCPI36_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v10, v0.t ; RV64-NEXT: vor.vv v8, v9, v8, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI36_1) -; RV64-NEXT: ld a0, %lo(.LCPI36_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t ; RV64-NEXT: vor.vv v8, v9, v8, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI36_2) -; RV64-NEXT: ld a0, %lo(.LCPI36_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t ; RV64-NEXT: vor.vv v8, v9, v8, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t @@ -2208,25 +2214,31 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v11 -; RV64-NEXT: lui a0, %hi(.LCPI37_0) -; RV64-NEXT: ld a0, %lo(.LCPI37_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 4 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI37_1) -; RV64-NEXT: ld a0, %lo(.LCPI37_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 2 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI37_2) -; RV64-NEXT: ld a0, %lo(.LCPI37_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -2347,25 +2359,31 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v14, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI38_0) -; RV64-NEXT: ld a0, %lo(.LCPI38_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v12, v0.t ; RV64-NEXT: vor.vv v8, v10, v8, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI38_1) -; RV64-NEXT: ld a0, %lo(.LCPI38_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t ; RV64-NEXT: vor.vv v8, v10, v8, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI38_2) -; RV64-NEXT: ld a0, %lo(.LCPI38_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t ; RV64-NEXT: vor.vv v8, v10, v8, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t @@ -2482,25 +2500,31 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v14 -; RV64-NEXT: lui a0, %hi(.LCPI39_0) -; RV64-NEXT: ld a0, %lo(.LCPI39_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: vsrl.vi v10, v8, 4 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI39_1) -; RV64-NEXT: ld a0, %lo(.LCPI39_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: vsrl.vi v10, v8, 2 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI39_2) -; RV64-NEXT: ld a0, %lo(.LCPI39_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -2621,25 +2645,31 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v20, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI40_0) -; RV64-NEXT: ld a0, %lo(.LCPI40_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vor.vv v8, v12, v8, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI40_1) -; RV64-NEXT: ld a0, %lo(.LCPI40_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t ; RV64-NEXT: vor.vv v8, v12, v8, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI40_2) -; RV64-NEXT: ld a0, %lo(.LCPI40_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t ; RV64-NEXT: vor.vv v8, v12, v8, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t @@ -2756,25 +2786,31 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v20 -; RV64-NEXT: lui a0, %hi(.LCPI41_0) -; RV64-NEXT: ld a0, %lo(.LCPI41_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vor.vv v8, v12, v8 ; RV64-NEXT: vsrl.vi v12, v8, 4 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI41_1) -; RV64-NEXT: ld a0, %lo(.LCPI41_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v12, v8 ; RV64-NEXT: vsrl.vi v12, v8, 2 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI41_2) -; RV64-NEXT: ld a0, %lo(.LCPI41_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v12, v8 ; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -2953,27 +2989,33 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI42_0) -; RV64-NEXT: ld a0, %lo(.LCPI42_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v24, v0.t -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vor.vv v16, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 4, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vor.vv v8, v16, v8, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI42_1) -; RV64-NEXT: ld a0, %lo(.LCPI42_1)(a0) -; RV64-NEXT: vsll.vi v16, v16, 4, v0.t -; RV64-NEXT: vor.vv v16, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vsll.vi v8, v8, 4, v0.t +; RV64-NEXT: vor.vv v8, v16, v8, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI42_2) -; RV64-NEXT: ld a0, %lo(.LCPI42_2)(a0) -; RV64-NEXT: vsll.vi v16, v16, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vsll.vi v8, v8, 2, v0.t +; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t @@ -3108,25 +3150,31 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v0 -; RV64-NEXT: lui a0, %hi(.LCPI43_0) -; RV64-NEXT: ld a0, %lo(.LCPI43_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 4 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI43_1) -; RV64-NEXT: ld a0, %lo(.LCPI43_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 2 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI43_2) -; RV64-NEXT: ld a0, %lo(.LCPI43_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -3305,27 +3353,33 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI44_0) -; RV64-NEXT: ld a0, %lo(.LCPI44_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v24, v0.t -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vor.vv v16, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 4, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vor.vv v8, v16, v8, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI44_1) -; RV64-NEXT: ld a0, %lo(.LCPI44_1)(a0) -; RV64-NEXT: vsll.vi v16, v16, 4, v0.t -; RV64-NEXT: vor.vv v16, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vsll.vi v8, v8, 4, v0.t +; RV64-NEXT: vor.vv v8, v16, v8, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI44_2) -; RV64-NEXT: ld a0, %lo(.LCPI44_2)(a0) -; RV64-NEXT: vsll.vi v16, v16, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vsll.vi v8, v8, 2, v0.t +; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t @@ -3460,25 +3514,31 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v0 -; RV64-NEXT: lui a0, %hi(.LCPI45_0) -; RV64-NEXT: ld a0, %lo(.LCPI45_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 4 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI45_1) -; RV64-NEXT: ld a0, %lo(.LCPI45_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 2 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI45_2) -; RV64-NEXT: ld a0, %lo(.LCPI45_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll @@ -1488,25 +1488,33 @@ ; RV64I-NEXT: vsrl.vx v9, v8, a0 ; RV64I-NEXT: vor.vv v8, v8, v9 ; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI18_0) -; RV64I-NEXT: ld a0, %lo(.LCPI18_0)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI18_1) -; RV64I-NEXT: ld a1, %lo(.LCPI18_1)(a1) ; RV64I-NEXT: vsrl.vi v9, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v9, v9, a0 ; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: vand.vx v9, v8, a1 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vand.vx v9, v8, a0 ; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vand.vx v8, v8, a0 ; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI18_2) -; RV64I-NEXT: ld a0, %lo(.LCPI18_2)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI18_3) -; RV64I-NEXT: ld a1, %lo(.LCPI18_3)(a1) ; RV64I-NEXT: vsrl.vi v9, v8, 4 ; RV64I-NEXT: vadd.vv v8, v8, v9 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vmul.vx v8, v8, a0 ; RV64I-NEXT: li a0, 56 ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret @@ -1620,25 +1628,33 @@ ; RV64I-NEXT: vsrl.vx v10, v8, a0 ; RV64I-NEXT: vor.vv v8, v8, v10 ; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI19_0) -; RV64I-NEXT: ld a0, %lo(.LCPI19_0)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI19_1) -; RV64I-NEXT: ld a1, %lo(.LCPI19_1)(a1) ; RV64I-NEXT: vsrl.vi v10, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v10, v10, a0 ; RV64I-NEXT: vsub.vv v8, v8, v10 -; RV64I-NEXT: vand.vx v10, v8, a1 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vand.vx v10, v8, a0 ; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vand.vx v8, v8, a0 ; RV64I-NEXT: vadd.vv v8, v10, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI19_2) -; RV64I-NEXT: ld a0, %lo(.LCPI19_2)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI19_3) -; RV64I-NEXT: ld a1, %lo(.LCPI19_3)(a1) ; RV64I-NEXT: vsrl.vi v10, v8, 4 ; RV64I-NEXT: vadd.vv v8, v8, v10 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vmul.vx v8, v8, a0 ; RV64I-NEXT: li a0, 56 ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret @@ -1752,25 +1768,33 @@ ; RV64I-NEXT: vsrl.vx v12, v8, a0 ; RV64I-NEXT: vor.vv v8, v8, v12 ; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI20_0) -; RV64I-NEXT: ld a0, %lo(.LCPI20_0)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI20_1) -; RV64I-NEXT: ld a1, %lo(.LCPI20_1)(a1) ; RV64I-NEXT: vsrl.vi v12, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v12, v12, a0 ; RV64I-NEXT: vsub.vv v8, v8, v12 -; RV64I-NEXT: vand.vx v12, v8, a1 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vand.vx v12, v8, a0 ; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vand.vx v8, v8, a0 ; RV64I-NEXT: vadd.vv v8, v12, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI20_2) -; RV64I-NEXT: ld a0, %lo(.LCPI20_2)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI20_3) -; RV64I-NEXT: ld a1, %lo(.LCPI20_3)(a1) ; RV64I-NEXT: vsrl.vi v12, v8, 4 ; RV64I-NEXT: vadd.vv v8, v8, v12 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vmul.vx v8, v8, a0 ; RV64I-NEXT: li a0, 56 ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret @@ -1884,25 +1908,33 @@ ; RV64I-NEXT: vsrl.vx v16, v8, a0 ; RV64I-NEXT: vor.vv v8, v8, v16 ; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI21_0) -; RV64I-NEXT: ld a0, %lo(.LCPI21_0)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI21_1) -; RV64I-NEXT: ld a1, %lo(.LCPI21_1)(a1) ; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v16, v16, a0 ; RV64I-NEXT: vsub.vv v8, v8, v16 -; RV64I-NEXT: vand.vx v16, v8, a1 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vand.vx v16, v8, a0 ; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vand.vx v8, v8, a0 ; RV64I-NEXT: vadd.vv v8, v16, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI21_2) -; RV64I-NEXT: ld a0, %lo(.LCPI21_2)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI21_3) -; RV64I-NEXT: ld a1, %lo(.LCPI21_3)(a1) ; RV64I-NEXT: vsrl.vi v16, v8, 4 ; RV64I-NEXT: vadd.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vmul.vx v8, v8, a0 ; RV64I-NEXT: li a0, 56 ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret @@ -3346,25 +3378,33 @@ ; RV64I-NEXT: vsrl.vx v9, v8, a0 ; RV64I-NEXT: vor.vv v8, v8, v9 ; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI40_0) -; RV64I-NEXT: ld a0, %lo(.LCPI40_0)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI40_1) -; RV64I-NEXT: ld a1, %lo(.LCPI40_1)(a1) ; RV64I-NEXT: vsrl.vi v9, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v9, v9, a0 ; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: vand.vx v9, v8, a1 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vand.vx v9, v8, a0 ; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vand.vx v8, v8, a0 ; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI40_2) -; RV64I-NEXT: ld a0, %lo(.LCPI40_2)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI40_3) -; RV64I-NEXT: ld a1, %lo(.LCPI40_3)(a1) ; RV64I-NEXT: vsrl.vi v9, v8, 4 ; RV64I-NEXT: vadd.vv v8, v8, v9 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vmul.vx v8, v8, a0 ; RV64I-NEXT: li a0, 56 ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret @@ -3473,25 +3513,33 @@ ; RV64I-NEXT: vsrl.vx v10, v8, a0 ; RV64I-NEXT: vor.vv v8, v8, v10 ; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI41_0) -; RV64I-NEXT: ld a0, %lo(.LCPI41_0)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI41_1) -; RV64I-NEXT: ld a1, %lo(.LCPI41_1)(a1) ; RV64I-NEXT: vsrl.vi v10, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v10, v10, a0 ; RV64I-NEXT: vsub.vv v8, v8, v10 -; RV64I-NEXT: vand.vx v10, v8, a1 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vand.vx v10, v8, a0 ; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vand.vx v8, v8, a0 ; RV64I-NEXT: vadd.vv v8, v10, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI41_2) -; RV64I-NEXT: ld a0, %lo(.LCPI41_2)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI41_3) -; RV64I-NEXT: ld a1, %lo(.LCPI41_3)(a1) ; RV64I-NEXT: vsrl.vi v10, v8, 4 ; RV64I-NEXT: vadd.vv v8, v8, v10 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vmul.vx v8, v8, a0 ; RV64I-NEXT: li a0, 56 ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret @@ -3600,25 +3648,33 @@ ; RV64I-NEXT: vsrl.vx v12, v8, a0 ; RV64I-NEXT: vor.vv v8, v8, v12 ; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI42_0) -; RV64I-NEXT: ld a0, %lo(.LCPI42_0)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI42_1) -; RV64I-NEXT: ld a1, %lo(.LCPI42_1)(a1) ; RV64I-NEXT: vsrl.vi v12, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v12, v12, a0 ; RV64I-NEXT: vsub.vv v8, v8, v12 -; RV64I-NEXT: vand.vx v12, v8, a1 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vand.vx v12, v8, a0 ; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vand.vx v8, v8, a0 ; RV64I-NEXT: vadd.vv v8, v12, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI42_2) -; RV64I-NEXT: ld a0, %lo(.LCPI42_2)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI42_3) -; RV64I-NEXT: ld a1, %lo(.LCPI42_3)(a1) ; RV64I-NEXT: vsrl.vi v12, v8, 4 ; RV64I-NEXT: vadd.vv v8, v8, v12 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vmul.vx v8, v8, a0 ; RV64I-NEXT: li a0, 56 ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret @@ -3727,25 +3783,33 @@ ; RV64I-NEXT: vsrl.vx v16, v8, a0 ; RV64I-NEXT: vor.vv v8, v8, v16 ; RV64I-NEXT: vnot.v v8, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI43_0) -; RV64I-NEXT: ld a0, %lo(.LCPI43_0)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI43_1) -; RV64I-NEXT: ld a1, %lo(.LCPI43_1)(a1) ; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v16, v16, a0 ; RV64I-NEXT: vsub.vv v8, v8, v16 -; RV64I-NEXT: vand.vx v16, v8, a1 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vand.vx v16, v8, a0 ; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vand.vx v8, v8, a0 ; RV64I-NEXT: vadd.vv v8, v16, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI43_2) -; RV64I-NEXT: ld a0, %lo(.LCPI43_2)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI43_3) -; RV64I-NEXT: ld a1, %lo(.LCPI43_3)(a1) ; RV64I-NEXT: vsrl.vi v16, v8, 4 ; RV64I-NEXT: vadd.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vmul.vx v8, v8, a0 ; RV64I-NEXT: li a0, 56 ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -2152,25 +2152,33 @@ ; RV64-NEXT: vsrl.vx v9, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v9, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI36_0) -; RV64-NEXT: ld a0, %lo(.LCPI36_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI36_1) -; RV64-NEXT: ld a1, %lo(.LCPI36_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: vand.vx v9, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI36_2) -; RV64-NEXT: ld a0, %lo(.LCPI36_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI36_3) -; RV64-NEXT: ld a1, %lo(.LCPI36_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -2260,25 +2268,33 @@ ; RV64-NEXT: vsrl.vx v9, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI37_0) -; RV64-NEXT: ld a0, %lo(.LCPI37_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI37_1) -; RV64-NEXT: ld a1, %lo(.LCPI37_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: lui a0, %hi(.LCPI37_2) -; RV64-NEXT: ld a0, %lo(.LCPI37_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI37_3) -; RV64-NEXT: ld a1, %lo(.LCPI37_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2372,25 +2388,33 @@ ; RV64-NEXT: vsrl.vx v10, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v10, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI38_0) -; RV64-NEXT: ld a0, %lo(.LCPI38_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI38_1) -; RV64-NEXT: ld a1, %lo(.LCPI38_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI38_2) -; RV64-NEXT: ld a0, %lo(.LCPI38_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI38_3) -; RV64-NEXT: ld a1, %lo(.LCPI38_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -2480,25 +2504,33 @@ ; RV64-NEXT: vsrl.vx v10, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI39_0) -; RV64-NEXT: ld a0, %lo(.LCPI39_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI39_1) -; RV64-NEXT: ld a1, %lo(.LCPI39_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: lui a0, %hi(.LCPI39_2) -; RV64-NEXT: ld a0, %lo(.LCPI39_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI39_3) -; RV64-NEXT: ld a1, %lo(.LCPI39_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2592,25 +2624,33 @@ ; RV64-NEXT: vsrl.vx v12, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v12, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI40_0) -; RV64-NEXT: ld a0, %lo(.LCPI40_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI40_1) -; RV64-NEXT: ld a1, %lo(.LCPI40_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: vand.vx v12, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI40_2) -; RV64-NEXT: ld a0, %lo(.LCPI40_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI40_3) -; RV64-NEXT: ld a1, %lo(.LCPI40_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -2700,25 +2740,33 @@ ; RV64-NEXT: vsrl.vx v12, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI41_0) -; RV64-NEXT: ld a0, %lo(.LCPI41_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI41_1) -; RV64-NEXT: ld a1, %lo(.LCPI41_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: lui a0, %hi(.LCPI41_2) -; RV64-NEXT: ld a0, %lo(.LCPI41_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI41_3) -; RV64-NEXT: ld a1, %lo(.LCPI41_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2812,25 +2860,33 @@ ; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI42_0) -; RV64-NEXT: ld a0, %lo(.LCPI42_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI42_1) -; RV64-NEXT: ld a1, %lo(.LCPI42_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI42_2) -; RV64-NEXT: ld a0, %lo(.LCPI42_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI42_3) -; RV64-NEXT: ld a1, %lo(.LCPI42_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -2920,25 +2976,33 @@ ; RV64-NEXT: vsrl.vx v16, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI43_0) -; RV64-NEXT: ld a0, %lo(.LCPI43_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI43_1) -; RV64-NEXT: ld a1, %lo(.LCPI43_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI43_2) -; RV64-NEXT: ld a0, %lo(.LCPI43_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI43_3) -; RV64-NEXT: ld a1, %lo(.LCPI43_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3032,25 +3096,33 @@ ; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI44_0) -; RV64-NEXT: ld a0, %lo(.LCPI44_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI44_1) -; RV64-NEXT: ld a1, %lo(.LCPI44_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI44_2) -; RV64-NEXT: ld a0, %lo(.LCPI44_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI44_3) -; RV64-NEXT: ld a1, %lo(.LCPI44_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -3140,25 +3212,33 @@ ; RV64-NEXT: vsrl.vx v16, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI45_0) -; RV64-NEXT: ld a0, %lo(.LCPI45_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI45_1) -; RV64-NEXT: ld a1, %lo(.LCPI45_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI45_2) -; RV64-NEXT: ld a0, %lo(.LCPI45_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI45_3) -; RV64-NEXT: ld a1, %lo(.LCPI45_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3427,24 +3507,32 @@ ; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v16, v8, v0.t -; RV64-NEXT: lui a3, %hi(.LCPI46_0) -; RV64-NEXT: ld a4, %lo(.LCPI46_0)(a3) -; RV64-NEXT: lui a3, %hi(.LCPI46_1) -; RV64-NEXT: ld a3, %lo(.LCPI46_1)(a3) ; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: vand.vx v8, v8, a4, v0.t -; RV64-NEXT: vsub.vv v8, v16, v8, v0.t -; RV64-NEXT: vand.vx v16, v8, a3, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV64-NEXT: lui a3, 349525 +; RV64-NEXT: addiw a3, a3, 1365 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vadd.vv v16, v16, v8, v0.t -; RV64-NEXT: lui a5, %hi(.LCPI46_2) -; RV64-NEXT: ld a5, %lo(.LCPI46_2)(a5) -; RV64-NEXT: lui a6, %hi(.LCPI46_3) -; RV64-NEXT: ld a6, %lo(.LCPI46_3)(a6) -; RV64-NEXT: vsrl.vi v8, v16, 4, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t +; RV64-NEXT: vsub.vv v16, v16, v8, v0.t +; RV64-NEXT: lui a4, 209715 +; RV64-NEXT: addiw a4, a4, 819 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 +; RV64-NEXT: vand.vx v8, v16, a4, v0.t +; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV64-NEXT: vand.vx v16, v16, a4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a5, 61681 +; RV64-NEXT: addiw a5, a5, -241 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: vand.vx v8, v8, a5, v0.t +; RV64-NEXT: lui a6, 4112 +; RV64-NEXT: addiw a6, a6, 257 +; RV64-NEXT: slli a7, a6, 32 +; RV64-NEXT: add a6, a6, a7 ; RV64-NEXT: vmul.vx v8, v8, a6, v0.t ; RV64-NEXT: li a7, 56 ; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t @@ -3475,11 +3563,11 @@ ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a4, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a4, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t @@ -3662,24 +3750,32 @@ ; RV64-NEXT: vsrl.vx v24, v16, a2 ; RV64-NEXT: vor.vv v16, v16, v24 ; RV64-NEXT: vnot.v v16, v16 -; RV64-NEXT: lui a3, %hi(.LCPI47_0) -; RV64-NEXT: ld a3, %lo(.LCPI47_0)(a3) -; RV64-NEXT: lui a4, %hi(.LCPI47_1) -; RV64-NEXT: ld a4, %lo(.LCPI47_1)(a4) ; RV64-NEXT: vsrl.vi v24, v16, 1 +; RV64-NEXT: lui a3, 349525 +; RV64-NEXT: addiw a3, a3, 1365 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 ; RV64-NEXT: vand.vx v24, v24, a3 ; RV64-NEXT: vsub.vv v16, v16, v24 +; RV64-NEXT: lui a4, 209715 +; RV64-NEXT: addiw a4, a4, 819 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vand.vx v24, v16, a4 ; RV64-NEXT: vsrl.vi v16, v16, 2 ; RV64-NEXT: vand.vx v16, v16, a4 ; RV64-NEXT: vadd.vv v16, v24, v16 -; RV64-NEXT: lui a5, %hi(.LCPI47_2) -; RV64-NEXT: ld a5, %lo(.LCPI47_2)(a5) -; RV64-NEXT: lui a6, %hi(.LCPI47_3) -; RV64-NEXT: ld a6, %lo(.LCPI47_3)(a6) ; RV64-NEXT: vsrl.vi v24, v16, 4 ; RV64-NEXT: vadd.vv v16, v16, v24 +; RV64-NEXT: lui a5, 61681 +; RV64-NEXT: addiw a5, a5, -241 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: vand.vx v16, v16, a5 +; RV64-NEXT: lui a6, 4112 +; RV64-NEXT: addiw a6, a6, 257 +; RV64-NEXT: slli a7, a6, 32 +; RV64-NEXT: add a6, a6, a7 ; RV64-NEXT: vmul.vx v16, v16, a6 ; RV64-NEXT: li a7, 56 ; RV64-NEXT: vsrl.vx v16, v16, a7 @@ -5848,25 +5944,33 @@ ; RV64-NEXT: vsrl.vx v9, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v9, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI84_0) -; RV64-NEXT: ld a0, %lo(.LCPI84_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI84_1) -; RV64-NEXT: ld a1, %lo(.LCPI84_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: vand.vx v9, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI84_2) -; RV64-NEXT: ld a0, %lo(.LCPI84_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI84_3) -; RV64-NEXT: ld a1, %lo(.LCPI84_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -5956,25 +6060,33 @@ ; RV64-NEXT: vsrl.vx v9, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI85_0) -; RV64-NEXT: ld a0, %lo(.LCPI85_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI85_1) -; RV64-NEXT: ld a1, %lo(.LCPI85_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: lui a0, %hi(.LCPI85_2) -; RV64-NEXT: ld a0, %lo(.LCPI85_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI85_3) -; RV64-NEXT: ld a1, %lo(.LCPI85_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -6067,25 +6179,33 @@ ; RV64-NEXT: vsrl.vx v10, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v10, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI86_0) -; RV64-NEXT: ld a0, %lo(.LCPI86_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI86_1) -; RV64-NEXT: ld a1, %lo(.LCPI86_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI86_2) -; RV64-NEXT: ld a0, %lo(.LCPI86_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI86_3) -; RV64-NEXT: ld a1, %lo(.LCPI86_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -6175,25 +6295,33 @@ ; RV64-NEXT: vsrl.vx v10, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI87_0) -; RV64-NEXT: ld a0, %lo(.LCPI87_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI87_1) -; RV64-NEXT: ld a1, %lo(.LCPI87_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: lui a0, %hi(.LCPI87_2) -; RV64-NEXT: ld a0, %lo(.LCPI87_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI87_3) -; RV64-NEXT: ld a1, %lo(.LCPI87_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -6286,25 +6414,33 @@ ; RV64-NEXT: vsrl.vx v12, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v12, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI88_0) -; RV64-NEXT: ld a0, %lo(.LCPI88_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI88_1) -; RV64-NEXT: ld a1, %lo(.LCPI88_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: vand.vx v12, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI88_2) -; RV64-NEXT: ld a0, %lo(.LCPI88_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI88_3) -; RV64-NEXT: ld a1, %lo(.LCPI88_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -6394,25 +6530,33 @@ ; RV64-NEXT: vsrl.vx v12, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI89_0) -; RV64-NEXT: ld a0, %lo(.LCPI89_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI89_1) -; RV64-NEXT: ld a1, %lo(.LCPI89_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: lui a0, %hi(.LCPI89_2) -; RV64-NEXT: ld a0, %lo(.LCPI89_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI89_3) -; RV64-NEXT: ld a1, %lo(.LCPI89_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -6505,25 +6649,33 @@ ; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI90_0) -; RV64-NEXT: ld a0, %lo(.LCPI90_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI90_1) -; RV64-NEXT: ld a1, %lo(.LCPI90_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI90_2) -; RV64-NEXT: ld a0, %lo(.LCPI90_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI90_3) -; RV64-NEXT: ld a1, %lo(.LCPI90_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -6613,25 +6765,33 @@ ; RV64-NEXT: vsrl.vx v16, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI91_0) -; RV64-NEXT: ld a0, %lo(.LCPI91_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI91_1) -; RV64-NEXT: ld a1, %lo(.LCPI91_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI91_2) -; RV64-NEXT: ld a0, %lo(.LCPI91_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI91_3) -; RV64-NEXT: ld a1, %lo(.LCPI91_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -6724,25 +6884,33 @@ ; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI92_0) -; RV64-NEXT: ld a0, %lo(.LCPI92_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI92_1) -; RV64-NEXT: ld a1, %lo(.LCPI92_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI92_2) -; RV64-NEXT: ld a0, %lo(.LCPI92_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI92_3) -; RV64-NEXT: ld a1, %lo(.LCPI92_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -6832,25 +7000,33 @@ ; RV64-NEXT: vsrl.vx v16, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI93_0) -; RV64-NEXT: ld a0, %lo(.LCPI93_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI93_1) -; RV64-NEXT: ld a1, %lo(.LCPI93_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI93_2) -; RV64-NEXT: ld a0, %lo(.LCPI93_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI93_3) -; RV64-NEXT: ld a1, %lo(.LCPI93_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -7117,24 +7293,32 @@ ; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v16, v8, v0.t -; RV64-NEXT: lui a3, %hi(.LCPI94_0) -; RV64-NEXT: ld a4, %lo(.LCPI94_0)(a3) -; RV64-NEXT: lui a3, %hi(.LCPI94_1) -; RV64-NEXT: ld a3, %lo(.LCPI94_1)(a3) ; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: vand.vx v8, v8, a4, v0.t -; RV64-NEXT: vsub.vv v8, v16, v8, v0.t -; RV64-NEXT: vand.vx v16, v8, a3, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV64-NEXT: lui a3, 349525 +; RV64-NEXT: addiw a3, a3, 1365 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vadd.vv v16, v16, v8, v0.t -; RV64-NEXT: lui a5, %hi(.LCPI94_2) -; RV64-NEXT: ld a5, %lo(.LCPI94_2)(a5) -; RV64-NEXT: lui a6, %hi(.LCPI94_3) -; RV64-NEXT: ld a6, %lo(.LCPI94_3)(a6) -; RV64-NEXT: vsrl.vi v8, v16, 4, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t +; RV64-NEXT: vsub.vv v16, v16, v8, v0.t +; RV64-NEXT: lui a4, 209715 +; RV64-NEXT: addiw a4, a4, 819 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 +; RV64-NEXT: vand.vx v8, v16, a4, v0.t +; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV64-NEXT: vand.vx v16, v16, a4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a5, 61681 +; RV64-NEXT: addiw a5, a5, -241 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: vand.vx v8, v8, a5, v0.t +; RV64-NEXT: lui a6, 4112 +; RV64-NEXT: addiw a6, a6, 257 +; RV64-NEXT: slli a7, a6, 32 +; RV64-NEXT: add a6, a6, a7 ; RV64-NEXT: vmul.vx v8, v8, a6, v0.t ; RV64-NEXT: li a7, 56 ; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t @@ -7165,11 +7349,11 @@ ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a4, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a4, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t @@ -7352,24 +7536,32 @@ ; RV64-NEXT: vsrl.vx v24, v16, a2 ; RV64-NEXT: vor.vv v16, v16, v24 ; RV64-NEXT: vnot.v v16, v16 -; RV64-NEXT: lui a3, %hi(.LCPI95_0) -; RV64-NEXT: ld a3, %lo(.LCPI95_0)(a3) -; RV64-NEXT: lui a4, %hi(.LCPI95_1) -; RV64-NEXT: ld a4, %lo(.LCPI95_1)(a4) ; RV64-NEXT: vsrl.vi v24, v16, 1 +; RV64-NEXT: lui a3, 349525 +; RV64-NEXT: addiw a3, a3, 1365 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 ; RV64-NEXT: vand.vx v24, v24, a3 ; RV64-NEXT: vsub.vv v16, v16, v24 +; RV64-NEXT: lui a4, 209715 +; RV64-NEXT: addiw a4, a4, 819 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vand.vx v24, v16, a4 ; RV64-NEXT: vsrl.vi v16, v16, 2 ; RV64-NEXT: vand.vx v16, v16, a4 ; RV64-NEXT: vadd.vv v16, v24, v16 -; RV64-NEXT: lui a5, %hi(.LCPI95_2) -; RV64-NEXT: ld a5, %lo(.LCPI95_2)(a5) -; RV64-NEXT: lui a6, %hi(.LCPI95_3) -; RV64-NEXT: ld a6, %lo(.LCPI95_3)(a6) ; RV64-NEXT: vsrl.vi v24, v16, 4 ; RV64-NEXT: vadd.vv v16, v16, v24 +; RV64-NEXT: lui a5, 61681 +; RV64-NEXT: addiw a5, a5, -241 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: vand.vx v16, v16, a5 +; RV64-NEXT: lui a6, 4112 +; RV64-NEXT: addiw a6, a6, 257 +; RV64-NEXT: slli a7, a6, 32 +; RV64-NEXT: add a6, a6, a7 ; RV64-NEXT: vmul.vx v16, v16, a6 ; RV64-NEXT: li a7, 56 ; RV64-NEXT: vsrl.vx v16, v16, a7 diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll @@ -795,25 +795,33 @@ ; RV64-LABEL: ctpop_nxv1i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI18_0) -; RV64-NEXT: ld a0, %lo(.LCPI18_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI18_1) -; RV64-NEXT: ld a1, %lo(.LCPI18_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: lui a0, %hi(.LCPI18_2) -; RV64-NEXT: ld a0, %lo(.LCPI18_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI18_3) -; RV64-NEXT: ld a1, %lo(.LCPI18_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -868,25 +876,33 @@ ; RV64-LABEL: ctpop_nxv2i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI19_0) -; RV64-NEXT: ld a0, %lo(.LCPI19_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI19_1) -; RV64-NEXT: ld a1, %lo(.LCPI19_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: lui a0, %hi(.LCPI19_2) -; RV64-NEXT: ld a0, %lo(.LCPI19_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI19_3) -; RV64-NEXT: ld a1, %lo(.LCPI19_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -941,25 +957,33 @@ ; RV64-LABEL: ctpop_nxv4i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI20_0) -; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI20_1) -; RV64-NEXT: ld a1, %lo(.LCPI20_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: lui a0, %hi(.LCPI20_2) -; RV64-NEXT: ld a0, %lo(.LCPI20_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI20_3) -; RV64-NEXT: ld a1, %lo(.LCPI20_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1014,25 +1038,33 @@ ; RV64-LABEL: ctpop_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI21_0) -; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI21_1) -; RV64-NEXT: ld a1, %lo(.LCPI21_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI21_2) -; RV64-NEXT: ld a0, %lo(.LCPI21_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI21_3) -; RV64-NEXT: ld a1, %lo(.LCPI21_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll @@ -1590,25 +1590,33 @@ ; RV64-LABEL: vp_ctpop_nxv1i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI36_0) -; RV64-NEXT: ld a0, %lo(.LCPI36_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI36_1) -; RV64-NEXT: ld a1, %lo(.LCPI36_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: vand.vx v9, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI36_2) -; RV64-NEXT: ld a0, %lo(.LCPI36_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI36_3) -; RV64-NEXT: ld a1, %lo(.LCPI36_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -1670,25 +1678,33 @@ ; RV64-LABEL: vp_ctpop_nxv1i64_unmasked: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI37_0) -; RV64-NEXT: ld a0, %lo(.LCPI37_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI37_1) -; RV64-NEXT: ld a1, %lo(.LCPI37_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: lui a0, %hi(.LCPI37_2) -; RV64-NEXT: ld a0, %lo(.LCPI37_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI37_3) -; RV64-NEXT: ld a1, %lo(.LCPI37_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1754,25 +1770,33 @@ ; RV64-LABEL: vp_ctpop_nxv2i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI38_0) -; RV64-NEXT: ld a0, %lo(.LCPI38_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI38_1) -; RV64-NEXT: ld a1, %lo(.LCPI38_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI38_2) -; RV64-NEXT: ld a0, %lo(.LCPI38_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI38_3) -; RV64-NEXT: ld a1, %lo(.LCPI38_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -1834,25 +1858,33 @@ ; RV64-LABEL: vp_ctpop_nxv2i64_unmasked: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI39_0) -; RV64-NEXT: ld a0, %lo(.LCPI39_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI39_1) -; RV64-NEXT: ld a1, %lo(.LCPI39_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: lui a0, %hi(.LCPI39_2) -; RV64-NEXT: ld a0, %lo(.LCPI39_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI39_3) -; RV64-NEXT: ld a1, %lo(.LCPI39_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1918,25 +1950,33 @@ ; RV64-LABEL: vp_ctpop_nxv4i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI40_0) -; RV64-NEXT: ld a0, %lo(.LCPI40_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI40_1) -; RV64-NEXT: ld a1, %lo(.LCPI40_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: vand.vx v12, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI40_2) -; RV64-NEXT: ld a0, %lo(.LCPI40_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI40_3) -; RV64-NEXT: ld a1, %lo(.LCPI40_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -1998,25 +2038,33 @@ ; RV64-LABEL: vp_ctpop_nxv4i64_unmasked: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI41_0) -; RV64-NEXT: ld a0, %lo(.LCPI41_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI41_1) -; RV64-NEXT: ld a1, %lo(.LCPI41_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: lui a0, %hi(.LCPI41_2) -; RV64-NEXT: ld a0, %lo(.LCPI41_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI41_3) -; RV64-NEXT: ld a1, %lo(.LCPI41_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2082,25 +2130,33 @@ ; RV64-LABEL: vp_ctpop_nxv7i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI42_0) -; RV64-NEXT: ld a0, %lo(.LCPI42_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI42_1) -; RV64-NEXT: ld a1, %lo(.LCPI42_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI42_2) -; RV64-NEXT: ld a0, %lo(.LCPI42_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI42_3) -; RV64-NEXT: ld a1, %lo(.LCPI42_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -2162,25 +2218,33 @@ ; RV64-LABEL: vp_ctpop_nxv7i64_unmasked: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI43_0) -; RV64-NEXT: ld a0, %lo(.LCPI43_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI43_1) -; RV64-NEXT: ld a1, %lo(.LCPI43_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI43_2) -; RV64-NEXT: ld a0, %lo(.LCPI43_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI43_3) -; RV64-NEXT: ld a1, %lo(.LCPI43_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2246,25 +2310,33 @@ ; RV64-LABEL: vp_ctpop_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI44_0) -; RV64-NEXT: ld a0, %lo(.LCPI44_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI44_1) -; RV64-NEXT: ld a1, %lo(.LCPI44_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI44_2) -; RV64-NEXT: ld a0, %lo(.LCPI44_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI44_3) -; RV64-NEXT: ld a1, %lo(.LCPI44_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -2326,25 +2398,33 @@ ; RV64-LABEL: vp_ctpop_nxv8i64_unmasked: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI45_0) -; RV64-NEXT: ld a0, %lo(.LCPI45_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI45_1) -; RV64-NEXT: ld a1, %lo(.LCPI45_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI45_2) -; RV64-NEXT: ld a0, %lo(.LCPI45_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI45_3) -; RV64-NEXT: ld a1, %lo(.LCPI45_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2551,69 +2631,77 @@ ; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: sub sp, sp, a1 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a2, a1, 3 ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a2 -; RV64-NEXT: sub a2, a0, a1 -; RV64-NEXT: sltu a3, a0, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 +; RV64-NEXT: vslidedown.vx v24, v0, a2 +; RV64-NEXT: mv a2, a0 +; RV64-NEXT: bltu a0, a1, .LBB46_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: .LBB46_2: ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: lui a2, %hi(.LCPI46_0) -; RV64-NEXT: ld a3, %lo(.LCPI46_0)(a2) -; RV64-NEXT: lui a2, %hi(.LCPI46_1) -; RV64-NEXT: ld a2, %lo(.LCPI46_1)(a2) -; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vsub.vv v8, v16, v8, v0.t -; RV64-NEXT: vand.vx v16, v8, a2, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: vand.vx v16, v16, a2, v0.t +; RV64-NEXT: vsub.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vadd.vv v16, v16, v8, v0.t -; RV64-NEXT: lui a4, %hi(.LCPI46_2) -; RV64-NEXT: ld a4, %lo(.LCPI46_2)(a4) -; RV64-NEXT: lui a5, %hi(.LCPI46_3) -; RV64-NEXT: ld a5, %lo(.LCPI46_3)(a5) -; RV64-NEXT: vsrl.vi v8, v16, 4, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: addiw a4, a4, -241 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vand.vx v8, v8, a4, v0.t +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: vmul.vx v8, v8, a5, v0.t ; RV64-NEXT: li a6, 56 ; RV64-NEXT: vsrl.vx v8, v8, a6, v0.t ; RV64-NEXT: addi a7, sp, 16 ; RV64-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill -; RV64-NEXT: bltu a0, a1, .LBB46_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB46_2: +; RV64-NEXT: sub a1, a0, a1 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vsub.vv v8, v16, v8, v0.t -; RV64-NEXT: vand.vx v16, v8, a2, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t +; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: vand.vx v16, v16, a2, v0.t +; RV64-NEXT: vsub.vv v16, v8, v16, v0.t +; RV64-NEXT: vand.vx v8, v16, a3, v0.t +; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vadd.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t ; RV64-NEXT: vand.vx v8, v8, a4, v0.t ; RV64-NEXT: vmul.vx v8, v8, a5, v0.t -; RV64-NEXT: vsrl.vx v8, v8, a6, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a6, v0.t ; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 @@ -2742,49 +2830,58 @@ ; RV64-LABEL: vp_ctpop_nxv16i64_unmasked: ; RV64: # %bb.0: ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: sub a2, a0, a1 -; RV64-NEXT: sltu a3, a0, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: lui a2, %hi(.LCPI47_0) -; RV64-NEXT: ld a2, %lo(.LCPI47_0)(a2) -; RV64-NEXT: lui a3, %hi(.LCPI47_1) -; RV64-NEXT: ld a3, %lo(.LCPI47_1)(a3) -; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: vand.vx v24, v24, a2 -; RV64-NEXT: vsub.vv v16, v16, v24 -; RV64-NEXT: vand.vx v24, v16, a3 -; RV64-NEXT: vsrl.vi v16, v16, 2 -; RV64-NEXT: vand.vx v16, v16, a3 -; RV64-NEXT: vadd.vv v16, v24, v16 -; RV64-NEXT: lui a4, %hi(.LCPI47_2) -; RV64-NEXT: ld a4, %lo(.LCPI47_2)(a4) -; RV64-NEXT: lui a5, %hi(.LCPI47_3) -; RV64-NEXT: ld a5, %lo(.LCPI47_3)(a5) -; RV64-NEXT: vsrl.vi v24, v16, 4 -; RV64-NEXT: vadd.vv v16, v16, v24 -; RV64-NEXT: vand.vx v16, v16, a4 -; RV64-NEXT: vmul.vx v16, v16, a5 -; RV64-NEXT: li a6, 56 -; RV64-NEXT: vsrl.vx v16, v16, a6 +; RV64-NEXT: mv a2, a0 ; RV64-NEXT: bltu a0, a1, .LBB47_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 +; RV64-NEXT: mv a2, a1 ; RV64-NEXT: .LBB47_2: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v8, 1 +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: add a2, a2, a3 ; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vsub.vv v8, v8, v24 +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 ; RV64-NEXT: vand.vx v24, v8, a3 ; RV64-NEXT: vsrl.vi v8, v8, 2 ; RV64-NEXT: vand.vx v8, v8, a3 ; RV64-NEXT: vadd.vv v8, v24, v8 ; RV64-NEXT: vsrl.vi v24, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v24 +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: addiw a4, a4, -241 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vand.vx v8, v8, a4 +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: vmul.vx v8, v8, a5 +; RV64-NEXT: li a6, 56 ; RV64-NEXT: vsrl.vx v8, v8, a6 +; RV64-NEXT: sub a1, a0, a1 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v24, v16, 1 +; RV64-NEXT: vand.vx v24, v24, a2 +; RV64-NEXT: vsub.vv v16, v16, v24 +; RV64-NEXT: vand.vx v24, v16, a3 +; RV64-NEXT: vsrl.vi v16, v16, 2 +; RV64-NEXT: vand.vx v16, v16, a3 +; RV64-NEXT: vadd.vv v16, v24, v16 +; RV64-NEXT: vsrl.vi v24, v16, 4 +; RV64-NEXT: vadd.vv v16, v16, v24 +; RV64-NEXT: vand.vx v16, v16, a4 +; RV64-NEXT: vmul.vx v16, v16, a5 +; RV64-NEXT: vsrl.vx v16, v16, a6 ; RV64-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll @@ -1407,25 +1407,33 @@ ; RV64I-NEXT: vsub.vx v9, v8, a0 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: lui a0, %hi(.LCPI18_0) -; RV64I-NEXT: ld a0, %lo(.LCPI18_0)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI18_1) -; RV64I-NEXT: ld a1, %lo(.LCPI18_1)(a1) ; RV64I-NEXT: vsrl.vi v9, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v9, v9, a0 ; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: vand.vx v9, v8, a1 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vand.vx v9, v8, a0 ; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vand.vx v8, v8, a0 ; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI18_2) -; RV64I-NEXT: ld a0, %lo(.LCPI18_2)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI18_3) -; RV64I-NEXT: ld a1, %lo(.LCPI18_3)(a1) ; RV64I-NEXT: vsrl.vi v9, v8, 4 ; RV64I-NEXT: vadd.vv v8, v8, v9 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vmul.vx v8, v8, a0 ; RV64I-NEXT: li a0, 56 ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret @@ -1566,25 +1574,33 @@ ; RV64I-NEXT: vsub.vx v10, v8, a0 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v10 -; RV64I-NEXT: lui a0, %hi(.LCPI19_0) -; RV64I-NEXT: ld a0, %lo(.LCPI19_0)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI19_1) -; RV64I-NEXT: ld a1, %lo(.LCPI19_1)(a1) ; RV64I-NEXT: vsrl.vi v10, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v10, v10, a0 ; RV64I-NEXT: vsub.vv v8, v8, v10 -; RV64I-NEXT: vand.vx v10, v8, a1 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vand.vx v10, v8, a0 ; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vand.vx v8, v8, a0 ; RV64I-NEXT: vadd.vv v8, v10, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI19_2) -; RV64I-NEXT: ld a0, %lo(.LCPI19_2)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI19_3) -; RV64I-NEXT: ld a1, %lo(.LCPI19_3)(a1) ; RV64I-NEXT: vsrl.vi v10, v8, 4 ; RV64I-NEXT: vadd.vv v8, v8, v10 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vmul.vx v8, v8, a0 ; RV64I-NEXT: li a0, 56 ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret @@ -1725,25 +1741,33 @@ ; RV64I-NEXT: vsub.vx v12, v8, a0 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v12 -; RV64I-NEXT: lui a0, %hi(.LCPI20_0) -; RV64I-NEXT: ld a0, %lo(.LCPI20_0)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI20_1) -; RV64I-NEXT: ld a1, %lo(.LCPI20_1)(a1) ; RV64I-NEXT: vsrl.vi v12, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v12, v12, a0 ; RV64I-NEXT: vsub.vv v8, v8, v12 -; RV64I-NEXT: vand.vx v12, v8, a1 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vand.vx v12, v8, a0 ; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vand.vx v8, v8, a0 ; RV64I-NEXT: vadd.vv v8, v12, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI20_2) -; RV64I-NEXT: ld a0, %lo(.LCPI20_2)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI20_3) -; RV64I-NEXT: ld a1, %lo(.LCPI20_3)(a1) ; RV64I-NEXT: vsrl.vi v12, v8, 4 ; RV64I-NEXT: vadd.vv v8, v8, v12 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vmul.vx v8, v8, a0 ; RV64I-NEXT: li a0, 56 ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret @@ -1884,25 +1908,33 @@ ; RV64I-NEXT: vsub.vx v16, v8, a0 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v16 -; RV64I-NEXT: lui a0, %hi(.LCPI21_0) -; RV64I-NEXT: ld a0, %lo(.LCPI21_0)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI21_1) -; RV64I-NEXT: ld a1, %lo(.LCPI21_1)(a1) ; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v16, v16, a0 ; RV64I-NEXT: vsub.vv v8, v8, v16 -; RV64I-NEXT: vand.vx v16, v8, a1 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vand.vx v16, v8, a0 ; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vand.vx v8, v8, a0 ; RV64I-NEXT: vadd.vv v8, v16, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI21_2) -; RV64I-NEXT: ld a0, %lo(.LCPI21_2)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI21_3) -; RV64I-NEXT: ld a1, %lo(.LCPI21_3)(a1) ; RV64I-NEXT: vsrl.vi v16, v8, 4 ; RV64I-NEXT: vadd.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vmul.vx v8, v8, a0 ; RV64I-NEXT: li a0, 56 ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret @@ -3292,25 +3324,33 @@ ; RV64I-NEXT: vsub.vx v9, v8, a0 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v9 -; RV64I-NEXT: lui a0, %hi(.LCPI40_0) -; RV64I-NEXT: ld a0, %lo(.LCPI40_0)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI40_1) -; RV64I-NEXT: ld a1, %lo(.LCPI40_1)(a1) ; RV64I-NEXT: vsrl.vi v9, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v9, v9, a0 ; RV64I-NEXT: vsub.vv v8, v8, v9 -; RV64I-NEXT: vand.vx v9, v8, a1 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vand.vx v9, v8, a0 ; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vand.vx v8, v8, a0 ; RV64I-NEXT: vadd.vv v8, v9, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI40_2) -; RV64I-NEXT: ld a0, %lo(.LCPI40_2)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI40_3) -; RV64I-NEXT: ld a1, %lo(.LCPI40_3)(a1) ; RV64I-NEXT: vsrl.vi v9, v8, 4 ; RV64I-NEXT: vadd.vv v8, v8, v9 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vmul.vx v8, v8, a0 ; RV64I-NEXT: li a0, 56 ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret @@ -3404,25 +3444,33 @@ ; RV64I-NEXT: vsub.vx v10, v8, a0 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v10 -; RV64I-NEXT: lui a0, %hi(.LCPI41_0) -; RV64I-NEXT: ld a0, %lo(.LCPI41_0)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI41_1) -; RV64I-NEXT: ld a1, %lo(.LCPI41_1)(a1) ; RV64I-NEXT: vsrl.vi v10, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v10, v10, a0 ; RV64I-NEXT: vsub.vv v8, v8, v10 -; RV64I-NEXT: vand.vx v10, v8, a1 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vand.vx v10, v8, a0 ; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vand.vx v8, v8, a0 ; RV64I-NEXT: vadd.vv v8, v10, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI41_2) -; RV64I-NEXT: ld a0, %lo(.LCPI41_2)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI41_3) -; RV64I-NEXT: ld a1, %lo(.LCPI41_3)(a1) ; RV64I-NEXT: vsrl.vi v10, v8, 4 ; RV64I-NEXT: vadd.vv v8, v8, v10 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vmul.vx v8, v8, a0 ; RV64I-NEXT: li a0, 56 ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret @@ -3516,25 +3564,33 @@ ; RV64I-NEXT: vsub.vx v12, v8, a0 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v12 -; RV64I-NEXT: lui a0, %hi(.LCPI42_0) -; RV64I-NEXT: ld a0, %lo(.LCPI42_0)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI42_1) -; RV64I-NEXT: ld a1, %lo(.LCPI42_1)(a1) ; RV64I-NEXT: vsrl.vi v12, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v12, v12, a0 ; RV64I-NEXT: vsub.vv v8, v8, v12 -; RV64I-NEXT: vand.vx v12, v8, a1 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vand.vx v12, v8, a0 ; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vand.vx v8, v8, a0 ; RV64I-NEXT: vadd.vv v8, v12, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI42_2) -; RV64I-NEXT: ld a0, %lo(.LCPI42_2)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI42_3) -; RV64I-NEXT: ld a1, %lo(.LCPI42_3)(a1) ; RV64I-NEXT: vsrl.vi v12, v8, 4 ; RV64I-NEXT: vadd.vv v8, v8, v12 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vmul.vx v8, v8, a0 ; RV64I-NEXT: li a0, 56 ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret @@ -3628,25 +3684,33 @@ ; RV64I-NEXT: vsub.vx v16, v8, a0 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v16 -; RV64I-NEXT: lui a0, %hi(.LCPI43_0) -; RV64I-NEXT: ld a0, %lo(.LCPI43_0)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI43_1) -; RV64I-NEXT: ld a1, %lo(.LCPI43_1)(a1) ; RV64I-NEXT: vsrl.vi v16, v8, 1 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v16, v16, a0 ; RV64I-NEXT: vsub.vv v8, v8, v16 -; RV64I-NEXT: vand.vx v16, v8, a1 +; RV64I-NEXT: lui a0, 209715 +; RV64I-NEXT: addiw a0, a0, 819 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vand.vx v16, v8, a0 ; RV64I-NEXT: vsrl.vi v8, v8, 2 -; RV64I-NEXT: vand.vx v8, v8, a1 +; RV64I-NEXT: vand.vx v8, v8, a0 ; RV64I-NEXT: vadd.vv v8, v16, v8 -; RV64I-NEXT: lui a0, %hi(.LCPI43_2) -; RV64I-NEXT: ld a0, %lo(.LCPI43_2)(a0) -; RV64I-NEXT: lui a1, %hi(.LCPI43_3) -; RV64I-NEXT: ld a1, %lo(.LCPI43_3)(a1) ; RV64I-NEXT: vsrl.vi v16, v8, 4 ; RV64I-NEXT: vadd.vv v8, v8, v16 +; RV64I-NEXT: lui a0, 61681 +; RV64I-NEXT: addiw a0, a0, -241 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: vand.vx v8, v8, a0 -; RV64I-NEXT: vmul.vx v8, v8, a1 +; RV64I-NEXT: lui a0, 4112 +; RV64I-NEXT: addiw a0, a0, 257 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: vmul.vx v8, v8, a0 ; RV64I-NEXT: li a0, 56 ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll @@ -1830,25 +1830,33 @@ ; RV64-NEXT: vsub.vx v9, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI36_0) -; RV64-NEXT: ld a0, %lo(.LCPI36_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI36_1) -; RV64-NEXT: ld a1, %lo(.LCPI36_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: vand.vx v9, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI36_2) -; RV64-NEXT: ld a0, %lo(.LCPI36_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI36_3) -; RV64-NEXT: ld a1, %lo(.LCPI36_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -1918,25 +1926,33 @@ ; RV64-NEXT: vsub.vx v9, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: lui a0, %hi(.LCPI37_0) -; RV64-NEXT: ld a0, %lo(.LCPI37_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI37_1) -; RV64-NEXT: ld a1, %lo(.LCPI37_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: lui a0, %hi(.LCPI37_2) -; RV64-NEXT: ld a0, %lo(.LCPI37_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI37_3) -; RV64-NEXT: ld a1, %lo(.LCPI37_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2010,25 +2026,33 @@ ; RV64-NEXT: vsub.vx v10, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI38_0) -; RV64-NEXT: ld a0, %lo(.LCPI38_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI38_1) -; RV64-NEXT: ld a1, %lo(.LCPI38_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI38_2) -; RV64-NEXT: ld a0, %lo(.LCPI38_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI38_3) -; RV64-NEXT: ld a1, %lo(.LCPI38_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -2098,25 +2122,33 @@ ; RV64-NEXT: vsub.vx v10, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v10 -; RV64-NEXT: lui a0, %hi(.LCPI39_0) -; RV64-NEXT: ld a0, %lo(.LCPI39_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI39_1) -; RV64-NEXT: ld a1, %lo(.LCPI39_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: lui a0, %hi(.LCPI39_2) -; RV64-NEXT: ld a0, %lo(.LCPI39_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI39_3) -; RV64-NEXT: ld a1, %lo(.LCPI39_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2190,25 +2222,33 @@ ; RV64-NEXT: vsub.vx v12, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI40_0) -; RV64-NEXT: ld a0, %lo(.LCPI40_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI40_1) -; RV64-NEXT: ld a1, %lo(.LCPI40_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: vand.vx v12, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI40_2) -; RV64-NEXT: ld a0, %lo(.LCPI40_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI40_3) -; RV64-NEXT: ld a1, %lo(.LCPI40_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -2278,25 +2318,33 @@ ; RV64-NEXT: vsub.vx v12, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v12 -; RV64-NEXT: lui a0, %hi(.LCPI41_0) -; RV64-NEXT: ld a0, %lo(.LCPI41_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI41_1) -; RV64-NEXT: ld a1, %lo(.LCPI41_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: lui a0, %hi(.LCPI41_2) -; RV64-NEXT: ld a0, %lo(.LCPI41_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI41_3) -; RV64-NEXT: ld a1, %lo(.LCPI41_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2370,25 +2418,33 @@ ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI42_0) -; RV64-NEXT: ld a0, %lo(.LCPI42_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI42_1) -; RV64-NEXT: ld a1, %lo(.LCPI42_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI42_2) -; RV64-NEXT: ld a0, %lo(.LCPI42_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI42_3) -; RV64-NEXT: ld a1, %lo(.LCPI42_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -2458,25 +2514,33 @@ ; RV64-NEXT: vsub.vx v16, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: lui a0, %hi(.LCPI43_0) -; RV64-NEXT: ld a0, %lo(.LCPI43_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI43_1) -; RV64-NEXT: ld a1, %lo(.LCPI43_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI43_2) -; RV64-NEXT: ld a0, %lo(.LCPI43_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI43_3) -; RV64-NEXT: ld a1, %lo(.LCPI43_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2550,25 +2614,33 @@ ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI44_0) -; RV64-NEXT: ld a0, %lo(.LCPI44_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI44_1) -; RV64-NEXT: ld a1, %lo(.LCPI44_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI44_2) -; RV64-NEXT: ld a0, %lo(.LCPI44_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI44_3) -; RV64-NEXT: ld a1, %lo(.LCPI44_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -2638,25 +2710,33 @@ ; RV64-NEXT: vsub.vx v16, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: lui a0, %hi(.LCPI45_0) -; RV64-NEXT: ld a0, %lo(.LCPI45_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI45_1) -; RV64-NEXT: ld a1, %lo(.LCPI45_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI45_2) -; RV64-NEXT: ld a0, %lo(.LCPI45_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI45_3) -; RV64-NEXT: ld a1, %lo(.LCPI45_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2895,24 +2975,32 @@ ; RV64-NEXT: vsub.vx v8, v16, a2, v0.t ; RV64-NEXT: vnot.v v16, v16, v0.t ; RV64-NEXT: vand.vv v16, v16, v8, v0.t -; RV64-NEXT: lui a3, %hi(.LCPI46_0) -; RV64-NEXT: ld a4, %lo(.LCPI46_0)(a3) -; RV64-NEXT: lui a3, %hi(.LCPI46_1) -; RV64-NEXT: ld a3, %lo(.LCPI46_1)(a3) ; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: vand.vx v8, v8, a4, v0.t -; RV64-NEXT: vsub.vv v8, v16, v8, v0.t -; RV64-NEXT: vand.vx v16, v8, a3, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV64-NEXT: lui a3, 349525 +; RV64-NEXT: addiw a3, a3, 1365 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vadd.vv v16, v16, v8, v0.t -; RV64-NEXT: lui a5, %hi(.LCPI46_2) -; RV64-NEXT: ld a5, %lo(.LCPI46_2)(a5) -; RV64-NEXT: lui a6, %hi(.LCPI46_3) -; RV64-NEXT: ld a6, %lo(.LCPI46_3)(a6) -; RV64-NEXT: vsrl.vi v8, v16, 4, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t +; RV64-NEXT: vsub.vv v16, v16, v8, v0.t +; RV64-NEXT: lui a4, 209715 +; RV64-NEXT: addiw a4, a4, 819 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 +; RV64-NEXT: vand.vx v8, v16, a4, v0.t +; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV64-NEXT: vand.vx v16, v16, a4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a5, 61681 +; RV64-NEXT: addiw a5, a5, -241 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: vand.vx v8, v8, a5, v0.t +; RV64-NEXT: lui a6, 4112 +; RV64-NEXT: addiw a6, a6, 257 +; RV64-NEXT: slli a7, a6, 32 +; RV64-NEXT: add a6, a6, a7 ; RV64-NEXT: vmul.vx v8, v8, a6, v0.t ; RV64-NEXT: li a7, 56 ; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t @@ -2933,11 +3021,11 @@ ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a4, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a4, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t @@ -3090,24 +3178,32 @@ ; RV64-NEXT: vsub.vx v24, v16, a2 ; RV64-NEXT: vnot.v v16, v16 ; RV64-NEXT: vand.vv v16, v16, v24 -; RV64-NEXT: lui a3, %hi(.LCPI47_0) -; RV64-NEXT: ld a3, %lo(.LCPI47_0)(a3) -; RV64-NEXT: lui a4, %hi(.LCPI47_1) -; RV64-NEXT: ld a4, %lo(.LCPI47_1)(a4) ; RV64-NEXT: vsrl.vi v24, v16, 1 +; RV64-NEXT: lui a3, 349525 +; RV64-NEXT: addiw a3, a3, 1365 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 ; RV64-NEXT: vand.vx v24, v24, a3 ; RV64-NEXT: vsub.vv v16, v16, v24 +; RV64-NEXT: lui a4, 209715 +; RV64-NEXT: addiw a4, a4, 819 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vand.vx v24, v16, a4 ; RV64-NEXT: vsrl.vi v16, v16, 2 ; RV64-NEXT: vand.vx v16, v16, a4 ; RV64-NEXT: vadd.vv v16, v24, v16 -; RV64-NEXT: lui a5, %hi(.LCPI47_2) -; RV64-NEXT: ld a5, %lo(.LCPI47_2)(a5) -; RV64-NEXT: lui a6, %hi(.LCPI47_3) -; RV64-NEXT: ld a6, %lo(.LCPI47_3)(a6) ; RV64-NEXT: vsrl.vi v24, v16, 4 ; RV64-NEXT: vadd.vv v16, v16, v24 +; RV64-NEXT: lui a5, 61681 +; RV64-NEXT: addiw a5, a5, -241 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: vand.vx v16, v16, a5 +; RV64-NEXT: lui a6, 4112 +; RV64-NEXT: addiw a6, a6, 257 +; RV64-NEXT: slli a7, a6, 32 +; RV64-NEXT: add a6, a6, a7 ; RV64-NEXT: vmul.vx v16, v16, a6 ; RV64-NEXT: li a7, 56 ; RV64-NEXT: vsrl.vx v16, v16, a7 @@ -4944,25 +5040,33 @@ ; RV64-NEXT: vsub.vx v9, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI84_0) -; RV64-NEXT: ld a0, %lo(.LCPI84_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI84_1) -; RV64-NEXT: ld a1, %lo(.LCPI84_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: vand.vx v9, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI84_2) -; RV64-NEXT: ld a0, %lo(.LCPI84_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI84_3) -; RV64-NEXT: ld a1, %lo(.LCPI84_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -5032,25 +5136,33 @@ ; RV64-NEXT: vsub.vx v9, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: lui a0, %hi(.LCPI85_0) -; RV64-NEXT: ld a0, %lo(.LCPI85_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI85_1) -; RV64-NEXT: ld a1, %lo(.LCPI85_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: lui a0, %hi(.LCPI85_2) -; RV64-NEXT: ld a0, %lo(.LCPI85_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI85_3) -; RV64-NEXT: ld a1, %lo(.LCPI85_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -5123,25 +5235,33 @@ ; RV64-NEXT: vsub.vx v10, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI86_0) -; RV64-NEXT: ld a0, %lo(.LCPI86_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI86_1) -; RV64-NEXT: ld a1, %lo(.LCPI86_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI86_2) -; RV64-NEXT: ld a0, %lo(.LCPI86_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI86_3) -; RV64-NEXT: ld a1, %lo(.LCPI86_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -5211,25 +5331,33 @@ ; RV64-NEXT: vsub.vx v10, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v10 -; RV64-NEXT: lui a0, %hi(.LCPI87_0) -; RV64-NEXT: ld a0, %lo(.LCPI87_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI87_1) -; RV64-NEXT: ld a1, %lo(.LCPI87_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: lui a0, %hi(.LCPI87_2) -; RV64-NEXT: ld a0, %lo(.LCPI87_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI87_3) -; RV64-NEXT: ld a1, %lo(.LCPI87_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -5302,25 +5430,33 @@ ; RV64-NEXT: vsub.vx v12, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI88_0) -; RV64-NEXT: ld a0, %lo(.LCPI88_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI88_1) -; RV64-NEXT: ld a1, %lo(.LCPI88_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: vand.vx v12, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI88_2) -; RV64-NEXT: ld a0, %lo(.LCPI88_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI88_3) -; RV64-NEXT: ld a1, %lo(.LCPI88_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -5390,25 +5526,33 @@ ; RV64-NEXT: vsub.vx v12, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v12 -; RV64-NEXT: lui a0, %hi(.LCPI89_0) -; RV64-NEXT: ld a0, %lo(.LCPI89_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI89_1) -; RV64-NEXT: ld a1, %lo(.LCPI89_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: lui a0, %hi(.LCPI89_2) -; RV64-NEXT: ld a0, %lo(.LCPI89_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI89_3) -; RV64-NEXT: ld a1, %lo(.LCPI89_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -5481,25 +5625,33 @@ ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI90_0) -; RV64-NEXT: ld a0, %lo(.LCPI90_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI90_1) -; RV64-NEXT: ld a1, %lo(.LCPI90_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI90_2) -; RV64-NEXT: ld a0, %lo(.LCPI90_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI90_3) -; RV64-NEXT: ld a1, %lo(.LCPI90_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -5569,25 +5721,33 @@ ; RV64-NEXT: vsub.vx v16, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: lui a0, %hi(.LCPI91_0) -; RV64-NEXT: ld a0, %lo(.LCPI91_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI91_1) -; RV64-NEXT: ld a1, %lo(.LCPI91_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI91_2) -; RV64-NEXT: ld a0, %lo(.LCPI91_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI91_3) -; RV64-NEXT: ld a1, %lo(.LCPI91_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -5660,25 +5820,33 @@ ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI92_0) -; RV64-NEXT: ld a0, %lo(.LCPI92_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI92_1) -; RV64-NEXT: ld a1, %lo(.LCPI92_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI92_2) -; RV64-NEXT: ld a0, %lo(.LCPI92_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI92_3) -; RV64-NEXT: ld a1, %lo(.LCPI92_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -5748,25 +5916,33 @@ ; RV64-NEXT: vsub.vx v16, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: lui a0, %hi(.LCPI93_0) -; RV64-NEXT: ld a0, %lo(.LCPI93_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI93_1) -; RV64-NEXT: ld a1, %lo(.LCPI93_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI93_2) -; RV64-NEXT: ld a0, %lo(.LCPI93_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI93_3) -; RV64-NEXT: ld a1, %lo(.LCPI93_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -6003,24 +6179,32 @@ ; RV64-NEXT: vsub.vx v8, v16, a2, v0.t ; RV64-NEXT: vnot.v v16, v16, v0.t ; RV64-NEXT: vand.vv v16, v16, v8, v0.t -; RV64-NEXT: lui a3, %hi(.LCPI94_0) -; RV64-NEXT: ld a4, %lo(.LCPI94_0)(a3) -; RV64-NEXT: lui a3, %hi(.LCPI94_1) -; RV64-NEXT: ld a3, %lo(.LCPI94_1)(a3) ; RV64-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV64-NEXT: vand.vx v8, v8, a4, v0.t -; RV64-NEXT: vsub.vv v8, v16, v8, v0.t -; RV64-NEXT: vand.vx v16, v8, a3, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV64-NEXT: lui a3, 349525 +; RV64-NEXT: addiw a3, a3, 1365 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vadd.vv v16, v16, v8, v0.t -; RV64-NEXT: lui a5, %hi(.LCPI94_2) -; RV64-NEXT: ld a5, %lo(.LCPI94_2)(a5) -; RV64-NEXT: lui a6, %hi(.LCPI94_3) -; RV64-NEXT: ld a6, %lo(.LCPI94_3)(a6) -; RV64-NEXT: vsrl.vi v8, v16, 4, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t +; RV64-NEXT: vsub.vv v16, v16, v8, v0.t +; RV64-NEXT: lui a4, 209715 +; RV64-NEXT: addiw a4, a4, 819 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 +; RV64-NEXT: vand.vx v8, v16, a4, v0.t +; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV64-NEXT: vand.vx v16, v16, a4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a5, 61681 +; RV64-NEXT: addiw a5, a5, -241 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: vand.vx v8, v8, a5, v0.t +; RV64-NEXT: lui a6, 4112 +; RV64-NEXT: addiw a6, a6, 257 +; RV64-NEXT: slli a7, a6, 32 +; RV64-NEXT: add a6, a6, a7 ; RV64-NEXT: vmul.vx v8, v8, a6, v0.t ; RV64-NEXT: li a7, 56 ; RV64-NEXT: vsrl.vx v8, v8, a7, v0.t @@ -6041,11 +6225,11 @@ ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a4, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a4, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t @@ -6198,24 +6382,32 @@ ; RV64-NEXT: vsub.vx v24, v16, a2 ; RV64-NEXT: vnot.v v16, v16 ; RV64-NEXT: vand.vv v16, v16, v24 -; RV64-NEXT: lui a3, %hi(.LCPI95_0) -; RV64-NEXT: ld a3, %lo(.LCPI95_0)(a3) -; RV64-NEXT: lui a4, %hi(.LCPI95_1) -; RV64-NEXT: ld a4, %lo(.LCPI95_1)(a4) ; RV64-NEXT: vsrl.vi v24, v16, 1 +; RV64-NEXT: lui a3, 349525 +; RV64-NEXT: addiw a3, a3, 1365 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 ; RV64-NEXT: vand.vx v24, v24, a3 ; RV64-NEXT: vsub.vv v16, v16, v24 +; RV64-NEXT: lui a4, 209715 +; RV64-NEXT: addiw a4, a4, 819 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vand.vx v24, v16, a4 ; RV64-NEXT: vsrl.vi v16, v16, 2 ; RV64-NEXT: vand.vx v16, v16, a4 ; RV64-NEXT: vadd.vv v16, v24, v16 -; RV64-NEXT: lui a5, %hi(.LCPI95_2) -; RV64-NEXT: ld a5, %lo(.LCPI95_2)(a5) -; RV64-NEXT: lui a6, %hi(.LCPI95_3) -; RV64-NEXT: ld a6, %lo(.LCPI95_3)(a6) ; RV64-NEXT: vsrl.vi v24, v16, 4 ; RV64-NEXT: vadd.vv v16, v16, v24 +; RV64-NEXT: lui a5, 61681 +; RV64-NEXT: addiw a5, a5, -241 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: vand.vx v16, v16, a5 +; RV64-NEXT: lui a6, 4112 +; RV64-NEXT: addiw a6, a6, 257 +; RV64-NEXT: slli a7, a6, 32 +; RV64-NEXT: add a6, a6, a7 ; RV64-NEXT: vmul.vx v16, v16, a6 ; RV64-NEXT: li a7, 56 ; RV64-NEXT: vsrl.vx v16, v16, a7 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -1482,25 +1482,31 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v11, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI24_0) -; RV64-NEXT: ld a0, %lo(.LCPI24_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v10, v0.t ; RV64-NEXT: vor.vv v8, v9, v8, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI24_1) -; RV64-NEXT: ld a0, %lo(.LCPI24_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t ; RV64-NEXT: vor.vv v8, v9, v8, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI24_2) -; RV64-NEXT: ld a0, %lo(.LCPI24_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t ; RV64-NEXT: vor.vv v8, v9, v8, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t @@ -1608,25 +1614,31 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v11 -; RV64-NEXT: lui a0, %hi(.LCPI25_0) -; RV64-NEXT: ld a0, %lo(.LCPI25_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 4 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI25_1) -; RV64-NEXT: ld a0, %lo(.LCPI25_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 2 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI25_2) -; RV64-NEXT: ld a0, %lo(.LCPI25_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -1740,25 +1752,31 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v14, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI26_0) -; RV64-NEXT: ld a0, %lo(.LCPI26_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v12, v0.t ; RV64-NEXT: vor.vv v8, v10, v8, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI26_1) -; RV64-NEXT: ld a0, %lo(.LCPI26_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t ; RV64-NEXT: vor.vv v8, v10, v8, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI26_2) -; RV64-NEXT: ld a0, %lo(.LCPI26_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t ; RV64-NEXT: vor.vv v8, v10, v8, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t @@ -1866,25 +1884,31 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v14 -; RV64-NEXT: lui a0, %hi(.LCPI27_0) -; RV64-NEXT: ld a0, %lo(.LCPI27_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: vsrl.vi v10, v8, 4 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI27_1) -; RV64-NEXT: ld a0, %lo(.LCPI27_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: vsrl.vi v10, v8, 2 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI27_2) -; RV64-NEXT: ld a0, %lo(.LCPI27_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -1999,25 +2023,31 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v20, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI28_0) -; RV64-NEXT: ld a0, %lo(.LCPI28_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vor.vv v8, v12, v8, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI28_1) -; RV64-NEXT: ld a0, %lo(.LCPI28_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t ; RV64-NEXT: vor.vv v8, v12, v8, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI28_2) -; RV64-NEXT: ld a0, %lo(.LCPI28_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t ; RV64-NEXT: vor.vv v8, v12, v8, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t @@ -2126,25 +2156,31 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v20 -; RV64-NEXT: lui a0, %hi(.LCPI29_0) -; RV64-NEXT: ld a0, %lo(.LCPI29_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vor.vv v8, v12, v8 ; RV64-NEXT: vsrl.vi v12, v8, 4 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI29_1) -; RV64-NEXT: ld a0, %lo(.LCPI29_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v12, v8 ; RV64-NEXT: vsrl.vi v12, v8, 2 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI29_2) -; RV64-NEXT: ld a0, %lo(.LCPI29_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v12, v8 ; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -2342,27 +2378,33 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI30_0) -; RV64-NEXT: ld a0, %lo(.LCPI30_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v24, v0.t -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vor.vv v16, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 4, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vor.vv v8, v16, v8, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI30_1) -; RV64-NEXT: ld a0, %lo(.LCPI30_1)(a0) -; RV64-NEXT: vsll.vi v16, v16, 4, v0.t -; RV64-NEXT: vor.vv v16, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vsll.vi v8, v8, 4, v0.t +; RV64-NEXT: vor.vv v8, v16, v8, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI30_2) -; RV64-NEXT: ld a0, %lo(.LCPI30_2)(a0) -; RV64-NEXT: vsll.vi v16, v16, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vsll.vi v8, v8, 2, v0.t +; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t @@ -2492,25 +2534,31 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v0 -; RV64-NEXT: lui a0, %hi(.LCPI31_0) -; RV64-NEXT: ld a0, %lo(.LCPI31_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 4 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI31_1) -; RV64-NEXT: ld a0, %lo(.LCPI31_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 2 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI31_2) -; RV64-NEXT: ld a0, %lo(.LCPI31_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -2708,27 +2756,33 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI32_0) -; RV64-NEXT: ld a0, %lo(.LCPI32_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v24, v0.t -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vor.vv v16, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 4, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vor.vv v8, v16, v8, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI32_1) -; RV64-NEXT: ld a0, %lo(.LCPI32_1)(a0) -; RV64-NEXT: vsll.vi v16, v16, 4, v0.t -; RV64-NEXT: vor.vv v16, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vsll.vi v8, v8, 4, v0.t +; RV64-NEXT: vor.vv v8, v16, v8, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI32_2) -; RV64-NEXT: ld a0, %lo(.LCPI32_2)(a0) -; RV64-NEXT: vsll.vi v16, v16, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vsll.vi v8, v8, 2, v0.t +; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t @@ -2858,25 +2912,31 @@ ; RV64-NEXT: vsrl.vi v8, v8, 8 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v0 -; RV64-NEXT: lui a0, %hi(.LCPI33_0) -; RV64-NEXT: ld a0, %lo(.LCPI33_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 4 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI33_1) -; RV64-NEXT: ld a0, %lo(.LCPI33_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 2 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, %hi(.LCPI33_2) -; RV64-NEXT: ld a0, %lo(.LCPI33_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -261,25 +261,31 @@ ; RV64-NEXT: vand.vx v8, v8, a3 ; RV64-NEXT: vsll.vx v8, v8, a2 ; RV64-NEXT: vor.vv v8, v11, v8 -; RV64-NEXT: lui a1, %hi(.LCPI2_0) -; RV64-NEXT: ld a1, %lo(.LCPI2_0)(a1) ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v9, v8, 4 +; RV64-NEXT: lui a1, 61681 +; RV64-NEXT: addiw a1, a1, -241 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 ; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: lui a1, %hi(.LCPI2_1) -; RV64-NEXT: ld a1, %lo(.LCPI2_1)(a1) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 2 +; RV64-NEXT: lui a1, 209715 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 ; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: lui a1, %hi(.LCPI2_2) -; RV64-NEXT: ld a1, %lo(.LCPI2_2)(a1) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 ; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -787,25 +793,31 @@ ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a3 ; LMULMAX2-RV64-NEXT: vsll.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: vor.vv v8, v14, v8 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI5_0) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI5_0)(a1) ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV64-NEXT: lui a1, 61681 +; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64-NEXT: add a1, a1, a2 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI5_1) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI5_1)(a1) ; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 4 ; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 2 +; LMULMAX2-RV64-NEXT: lui a1, 209715 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64-NEXT: add a1, a1, a2 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI5_2) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI5_2)(a1) ; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 2 ; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV64-NEXT: lui a1, 349525 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64-NEXT: add a1, a1, a2 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v8 @@ -954,25 +966,31 @@ ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 ; LMULMAX1-RV64-NEXT: vsll.vx v9, v9, a3 ; LMULMAX1-RV64-NEXT: vor.vv v9, v12, v9 -; LMULMAX1-RV64-NEXT: lui a7, %hi(.LCPI5_0) -; LMULMAX1-RV64-NEXT: ld a7, %lo(.LCPI5_0)(a7) ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 +; LMULMAX1-RV64-NEXT: lui a7, 61681 +; LMULMAX1-RV64-NEXT: addiw a7, a7, -241 +; LMULMAX1-RV64-NEXT: slli t0, a7, 32 +; LMULMAX1-RV64-NEXT: add a7, a7, t0 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a7 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a7 -; LMULMAX1-RV64-NEXT: lui t0, %hi(.LCPI5_1) -; LMULMAX1-RV64-NEXT: ld t0, %lo(.LCPI5_1)(t0) ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 4 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2 +; LMULMAX1-RV64-NEXT: lui t0, 209715 +; LMULMAX1-RV64-NEXT: addiw t0, t0, 819 +; LMULMAX1-RV64-NEXT: slli t1, t0, 32 +; LMULMAX1-RV64-NEXT: add t0, t0, t1 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t0 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, t0 -; LMULMAX1-RV64-NEXT: lui t1, %hi(.LCPI5_2) -; LMULMAX1-RV64-NEXT: ld t1, %lo(.LCPI5_2)(t1) ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 2 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 +; LMULMAX1-RV64-NEXT: lui t1, 349525 +; LMULMAX1-RV64-NEXT: addiw t1, t1, 1365 +; LMULMAX1-RV64-NEXT: slli t2, t1, 32 +; LMULMAX1-RV64-NEXT: add t1, t1, t2 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t1 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, t1 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll @@ -1519,25 +1519,33 @@ ; RV64-NEXT: vsrl.vx v9, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v9, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI24_0) -; RV64-NEXT: ld a0, %lo(.LCPI24_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI24_1) -; RV64-NEXT: ld a1, %lo(.LCPI24_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: vand.vx v9, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI24_2) -; RV64-NEXT: ld a0, %lo(.LCPI24_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI24_3) -; RV64-NEXT: ld a1, %lo(.LCPI24_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -1618,25 +1626,33 @@ ; RV64-NEXT: vsrl.vx v9, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI25_0) -; RV64-NEXT: ld a0, %lo(.LCPI25_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI25_1) -; RV64-NEXT: ld a1, %lo(.LCPI25_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: lui a0, %hi(.LCPI25_2) -; RV64-NEXT: ld a0, %lo(.LCPI25_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI25_3) -; RV64-NEXT: ld a1, %lo(.LCPI25_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1721,25 +1737,33 @@ ; RV64-NEXT: vsrl.vx v10, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v10, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI26_0) -; RV64-NEXT: ld a0, %lo(.LCPI26_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI26_1) -; RV64-NEXT: ld a1, %lo(.LCPI26_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI26_2) -; RV64-NEXT: ld a0, %lo(.LCPI26_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI26_3) -; RV64-NEXT: ld a1, %lo(.LCPI26_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -1820,25 +1844,33 @@ ; RV64-NEXT: vsrl.vx v10, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI27_0) -; RV64-NEXT: ld a0, %lo(.LCPI27_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI27_1) -; RV64-NEXT: ld a1, %lo(.LCPI27_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: lui a0, %hi(.LCPI27_2) -; RV64-NEXT: ld a0, %lo(.LCPI27_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI27_3) -; RV64-NEXT: ld a1, %lo(.LCPI27_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1923,25 +1955,33 @@ ; RV64-NEXT: vsrl.vx v12, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v12, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI28_0) -; RV64-NEXT: ld a0, %lo(.LCPI28_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI28_1) -; RV64-NEXT: ld a1, %lo(.LCPI28_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: vand.vx v12, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI28_2) -; RV64-NEXT: ld a0, %lo(.LCPI28_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI28_3) -; RV64-NEXT: ld a1, %lo(.LCPI28_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -2022,25 +2062,33 @@ ; RV64-NEXT: vsrl.vx v12, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI29_0) -; RV64-NEXT: ld a0, %lo(.LCPI29_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI29_1) -; RV64-NEXT: ld a1, %lo(.LCPI29_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: lui a0, %hi(.LCPI29_2) -; RV64-NEXT: ld a0, %lo(.LCPI29_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI29_3) -; RV64-NEXT: ld a1, %lo(.LCPI29_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2125,25 +2173,33 @@ ; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI30_0) -; RV64-NEXT: ld a0, %lo(.LCPI30_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI30_1) -; RV64-NEXT: ld a1, %lo(.LCPI30_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI30_2) -; RV64-NEXT: ld a0, %lo(.LCPI30_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI30_3) -; RV64-NEXT: ld a1, %lo(.LCPI30_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -2224,25 +2280,33 @@ ; RV64-NEXT: vsrl.vx v16, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI31_0) -; RV64-NEXT: ld a0, %lo(.LCPI31_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI31_1) -; RV64-NEXT: ld a1, %lo(.LCPI31_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI31_2) -; RV64-NEXT: ld a0, %lo(.LCPI31_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI31_3) -; RV64-NEXT: ld a1, %lo(.LCPI31_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2327,25 +2391,33 @@ ; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI32_0) -; RV64-NEXT: ld a0, %lo(.LCPI32_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI32_1) -; RV64-NEXT: ld a1, %lo(.LCPI32_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI32_2) -; RV64-NEXT: ld a0, %lo(.LCPI32_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI32_3) -; RV64-NEXT: ld a1, %lo(.LCPI32_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -2426,25 +2498,33 @@ ; RV64-NEXT: vsrl.vx v16, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI33_0) -; RV64-NEXT: ld a0, %lo(.LCPI33_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI33_1) -; RV64-NEXT: ld a1, %lo(.LCPI33_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI33_2) -; RV64-NEXT: ld a0, %lo(.LCPI33_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI33_3) -; RV64-NEXT: ld a1, %lo(.LCPI33_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2759,24 +2839,32 @@ ; RV64-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a2, %hi(.LCPI34_0) -; RV64-NEXT: ld a3, %lo(.LCPI34_0)(a2) -; RV64-NEXT: lui a2, %hi(.LCPI34_1) -; RV64-NEXT: ld a2, %lo(.LCPI34_1)(a2) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: vand.vx v16, v16, a2, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a2, v0.t +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a4, %hi(.LCPI34_2) -; RV64-NEXT: ld a4, %lo(.LCPI34_2)(a4) -; RV64-NEXT: lui a5, %hi(.LCPI34_3) -; RV64-NEXT: ld a5, %lo(.LCPI34_3)(a5) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: addiw a4, a4, -241 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vand.vx v8, v8, a4, v0.t +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: vmul.vx v8, v8, a5, v0.t ; RV64-NEXT: li a6, 56 ; RV64-NEXT: vsrl.vx v8, v8, a6, v0.t @@ -2807,11 +2895,11 @@ ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vand.vx v16, v16, a2, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a2, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t @@ -3003,24 +3091,32 @@ ; RV64-NEXT: vsrl.vx v24, v8, a1 ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a2, %hi(.LCPI35_0) -; RV64-NEXT: ld a2, %lo(.LCPI35_0)(a2) -; RV64-NEXT: lui a3, %hi(.LCPI35_1) -; RV64-NEXT: ld a3, %lo(.LCPI35_1)(a3) ; RV64-NEXT: vsrl.vi v24, v8, 1 +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: add a2, a2, a3 ; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vsub.vv v8, v8, v24 +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 ; RV64-NEXT: vand.vx v24, v8, a3 ; RV64-NEXT: vsrl.vi v8, v8, 2 ; RV64-NEXT: vand.vx v8, v8, a3 ; RV64-NEXT: vadd.vv v8, v24, v8 -; RV64-NEXT: lui a4, %hi(.LCPI35_2) -; RV64-NEXT: ld a4, %lo(.LCPI35_2)(a4) -; RV64-NEXT: lui a5, %hi(.LCPI35_3) -; RV64-NEXT: ld a5, %lo(.LCPI35_3)(a5) ; RV64-NEXT: vsrl.vi v24, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v24 +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: addiw a4, a4, -241 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vand.vx v8, v8, a4 +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: vmul.vx v8, v8, a5 ; RV64-NEXT: li a6, 56 ; RV64-NEXT: vsrl.vx v8, v8, a6 @@ -4550,25 +4646,33 @@ ; RV64-NEXT: vsrl.vx v9, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v9, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI60_0) -; RV64-NEXT: ld a0, %lo(.LCPI60_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI60_1) -; RV64-NEXT: ld a1, %lo(.LCPI60_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: vand.vx v9, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI60_2) -; RV64-NEXT: ld a0, %lo(.LCPI60_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI60_3) -; RV64-NEXT: ld a1, %lo(.LCPI60_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -4649,25 +4753,33 @@ ; RV64-NEXT: vsrl.vx v9, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI61_0) -; RV64-NEXT: ld a0, %lo(.LCPI61_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI61_1) -; RV64-NEXT: ld a1, %lo(.LCPI61_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: lui a0, %hi(.LCPI61_2) -; RV64-NEXT: ld a0, %lo(.LCPI61_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI61_3) -; RV64-NEXT: ld a1, %lo(.LCPI61_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -4750,25 +4862,33 @@ ; RV64-NEXT: vsrl.vx v10, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v10, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI62_0) -; RV64-NEXT: ld a0, %lo(.LCPI62_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI62_1) -; RV64-NEXT: ld a1, %lo(.LCPI62_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI62_2) -; RV64-NEXT: ld a0, %lo(.LCPI62_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI62_3) -; RV64-NEXT: ld a1, %lo(.LCPI62_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -4849,25 +4969,33 @@ ; RV64-NEXT: vsrl.vx v10, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI63_0) -; RV64-NEXT: ld a0, %lo(.LCPI63_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI63_1) -; RV64-NEXT: ld a1, %lo(.LCPI63_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: lui a0, %hi(.LCPI63_2) -; RV64-NEXT: ld a0, %lo(.LCPI63_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI63_3) -; RV64-NEXT: ld a1, %lo(.LCPI63_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -4950,25 +5078,33 @@ ; RV64-NEXT: vsrl.vx v12, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v12, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI64_0) -; RV64-NEXT: ld a0, %lo(.LCPI64_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI64_1) -; RV64-NEXT: ld a1, %lo(.LCPI64_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: vand.vx v12, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI64_2) -; RV64-NEXT: ld a0, %lo(.LCPI64_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI64_3) -; RV64-NEXT: ld a1, %lo(.LCPI64_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -5049,25 +5185,33 @@ ; RV64-NEXT: vsrl.vx v12, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI65_0) -; RV64-NEXT: ld a0, %lo(.LCPI65_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI65_1) -; RV64-NEXT: ld a1, %lo(.LCPI65_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: lui a0, %hi(.LCPI65_2) -; RV64-NEXT: ld a0, %lo(.LCPI65_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI65_3) -; RV64-NEXT: ld a1, %lo(.LCPI65_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -5150,25 +5294,33 @@ ; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI66_0) -; RV64-NEXT: ld a0, %lo(.LCPI66_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI66_1) -; RV64-NEXT: ld a1, %lo(.LCPI66_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI66_2) -; RV64-NEXT: ld a0, %lo(.LCPI66_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI66_3) -; RV64-NEXT: ld a1, %lo(.LCPI66_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -5249,25 +5401,33 @@ ; RV64-NEXT: vsrl.vx v16, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI67_0) -; RV64-NEXT: ld a0, %lo(.LCPI67_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI67_1) -; RV64-NEXT: ld a1, %lo(.LCPI67_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI67_2) -; RV64-NEXT: ld a0, %lo(.LCPI67_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI67_3) -; RV64-NEXT: ld a1, %lo(.LCPI67_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -5350,25 +5510,33 @@ ; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI68_0) -; RV64-NEXT: ld a0, %lo(.LCPI68_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI68_1) -; RV64-NEXT: ld a1, %lo(.LCPI68_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI68_2) -; RV64-NEXT: ld a0, %lo(.LCPI68_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI68_3) -; RV64-NEXT: ld a1, %lo(.LCPI68_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -5449,25 +5617,33 @@ ; RV64-NEXT: vsrl.vx v16, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a0, %hi(.LCPI69_0) -; RV64-NEXT: ld a0, %lo(.LCPI69_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI69_1) -; RV64-NEXT: ld a1, %lo(.LCPI69_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI69_2) -; RV64-NEXT: ld a0, %lo(.LCPI69_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI69_3) -; RV64-NEXT: ld a1, %lo(.LCPI69_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -5780,24 +5956,32 @@ ; RV64-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: lui a2, %hi(.LCPI70_0) -; RV64-NEXT: ld a3, %lo(.LCPI70_0)(a2) -; RV64-NEXT: lui a2, %hi(.LCPI70_1) -; RV64-NEXT: ld a2, %lo(.LCPI70_1)(a2) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: vand.vx v16, v16, a2, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a2, v0.t +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a4, %hi(.LCPI70_2) -; RV64-NEXT: ld a4, %lo(.LCPI70_2)(a4) -; RV64-NEXT: lui a5, %hi(.LCPI70_3) -; RV64-NEXT: ld a5, %lo(.LCPI70_3)(a5) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: addiw a4, a4, -241 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vand.vx v8, v8, a4, v0.t +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: vmul.vx v8, v8, a5, v0.t ; RV64-NEXT: li a6, 56 ; RV64-NEXT: vsrl.vx v8, v8, a6, v0.t @@ -5828,11 +6012,11 @@ ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vand.vx v16, v16, a2, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a2, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t @@ -6024,24 +6208,32 @@ ; RV64-NEXT: vsrl.vx v24, v8, a1 ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vnot.v v8, v8 -; RV64-NEXT: lui a2, %hi(.LCPI71_0) -; RV64-NEXT: ld a2, %lo(.LCPI71_0)(a2) -; RV64-NEXT: lui a3, %hi(.LCPI71_1) -; RV64-NEXT: ld a3, %lo(.LCPI71_1)(a3) ; RV64-NEXT: vsrl.vi v24, v8, 1 +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: add a2, a2, a3 ; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vsub.vv v8, v8, v24 +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 ; RV64-NEXT: vand.vx v24, v8, a3 ; RV64-NEXT: vsrl.vi v8, v8, 2 ; RV64-NEXT: vand.vx v8, v8, a3 ; RV64-NEXT: vadd.vv v8, v24, v8 -; RV64-NEXT: lui a4, %hi(.LCPI71_2) -; RV64-NEXT: ld a4, %lo(.LCPI71_2)(a4) -; RV64-NEXT: lui a5, %hi(.LCPI71_3) -; RV64-NEXT: ld a5, %lo(.LCPI71_3)(a5) ; RV64-NEXT: vsrl.vi v24, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v24 +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: addiw a4, a4, -241 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vand.vx v8, v8, a4 +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: vmul.vx v8, v8, a5 ; RV64-NEXT: li a6, 56 ; RV64-NEXT: vsrl.vx v8, v8, a6 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -509,25 +509,33 @@ ; LMULMAX2-RV64I-NEXT: vsrl.vx v9, v8, a1 ; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v9 ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: lui a1, %hi(.LCPI3_0) -; LMULMAX2-RV64I-NEXT: ld a1, %lo(.LCPI3_0)(a1) -; LMULMAX2-RV64I-NEXT: lui a2, %hi(.LCPI3_1) -; LMULMAX2-RV64I-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 +; LMULMAX2-RV64I-NEXT: lui a1, 349525 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64I-NEXT: add a1, a1, a2 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a2 +; LMULMAX2-RV64I-NEXT: lui a1, 209715 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64I-NEXT: add a1, a1, a2 +; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a2 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64I-NEXT: lui a1, %hi(.LCPI3_2) -; LMULMAX2-RV64I-NEXT: ld a1, %lo(.LCPI3_2)(a1) -; LMULMAX2-RV64I-NEXT: lui a2, %hi(.LCPI3_3) -; LMULMAX2-RV64I-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 +; LMULMAX2-RV64I-NEXT: lui a1, 61681 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64I-NEXT: add a1, a1, a2 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a2 +; LMULMAX2-RV64I-NEXT: lui a1, 4112 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 +; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64I-NEXT: add a1, a1, a2 +; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: li a1, 56 ; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0) @@ -1174,25 +1182,33 @@ ; LMULMAX2-RV64I-NEXT: vsrl.vx v10, v8, a1 ; LMULMAX2-RV64I-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV64I-NEXT: lui a1, %hi(.LCPI7_0) -; LMULMAX2-RV64I-NEXT: ld a1, %lo(.LCPI7_0)(a1) -; LMULMAX2-RV64I-NEXT: lui a2, %hi(.LCPI7_1) -; LMULMAX2-RV64I-NEXT: ld a2, %lo(.LCPI7_1)(a2) ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV64I-NEXT: lui a1, 349525 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64I-NEXT: add a1, a1, a2 ; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a2 +; LMULMAX2-RV64I-NEXT: lui a1, 209715 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64I-NEXT: add a1, a1, a2 +; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a2 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64I-NEXT: lui a1, %hi(.LCPI7_2) -; LMULMAX2-RV64I-NEXT: ld a1, %lo(.LCPI7_2)(a1) -; LMULMAX2-RV64I-NEXT: lui a2, %hi(.LCPI7_3) -; LMULMAX2-RV64I-NEXT: ld a2, %lo(.LCPI7_3)(a2) ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: lui a1, 61681 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64I-NEXT: add a1, a1, a2 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a2 +; LMULMAX2-RV64I-NEXT: lui a1, 4112 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 +; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64I-NEXT: add a1, a1, a2 +; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: li a1, 56 ; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -1112,25 +1112,33 @@ ; RV64-LABEL: vp_ctpop_v2i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI24_0) -; RV64-NEXT: ld a0, %lo(.LCPI24_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI24_1) -; RV64-NEXT: ld a1, %lo(.LCPI24_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: vand.vx v9, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI24_2) -; RV64-NEXT: ld a0, %lo(.LCPI24_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI24_3) -; RV64-NEXT: ld a1, %lo(.LCPI24_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -1180,25 +1188,33 @@ ; RV64-LABEL: vp_ctpop_v2i64_unmasked: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI25_0) -; RV64-NEXT: ld a0, %lo(.LCPI25_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI25_1) -; RV64-NEXT: ld a1, %lo(.LCPI25_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: lui a0, %hi(.LCPI25_2) -; RV64-NEXT: ld a0, %lo(.LCPI25_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI25_3) -; RV64-NEXT: ld a1, %lo(.LCPI25_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1252,25 +1268,33 @@ ; RV64-LABEL: vp_ctpop_v4i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI26_0) -; RV64-NEXT: ld a0, %lo(.LCPI26_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI26_1) -; RV64-NEXT: ld a1, %lo(.LCPI26_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI26_2) -; RV64-NEXT: ld a0, %lo(.LCPI26_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI26_3) -; RV64-NEXT: ld a1, %lo(.LCPI26_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -1320,25 +1344,33 @@ ; RV64-LABEL: vp_ctpop_v4i64_unmasked: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI27_0) -; RV64-NEXT: ld a0, %lo(.LCPI27_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI27_1) -; RV64-NEXT: ld a1, %lo(.LCPI27_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: lui a0, %hi(.LCPI27_2) -; RV64-NEXT: ld a0, %lo(.LCPI27_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI27_3) -; RV64-NEXT: ld a1, %lo(.LCPI27_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1392,25 +1424,33 @@ ; RV64-LABEL: vp_ctpop_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI28_0) -; RV64-NEXT: ld a0, %lo(.LCPI28_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI28_1) -; RV64-NEXT: ld a1, %lo(.LCPI28_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: vand.vx v12, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI28_2) -; RV64-NEXT: ld a0, %lo(.LCPI28_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI28_3) -; RV64-NEXT: ld a1, %lo(.LCPI28_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -1460,25 +1500,33 @@ ; RV64-LABEL: vp_ctpop_v8i64_unmasked: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI29_0) -; RV64-NEXT: ld a0, %lo(.LCPI29_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI29_1) -; RV64-NEXT: ld a1, %lo(.LCPI29_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: lui a0, %hi(.LCPI29_2) -; RV64-NEXT: ld a0, %lo(.LCPI29_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI29_3) -; RV64-NEXT: ld a1, %lo(.LCPI29_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1533,25 +1581,33 @@ ; RV64-LABEL: vp_ctpop_v15i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI30_0) -; RV64-NEXT: ld a0, %lo(.LCPI30_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI30_1) -; RV64-NEXT: ld a1, %lo(.LCPI30_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI30_2) -; RV64-NEXT: ld a0, %lo(.LCPI30_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI30_3) -; RV64-NEXT: ld a1, %lo(.LCPI30_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -1602,25 +1658,33 @@ ; RV64-LABEL: vp_ctpop_v15i64_unmasked: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI31_0) -; RV64-NEXT: ld a0, %lo(.LCPI31_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI31_1) -; RV64-NEXT: ld a1, %lo(.LCPI31_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI31_2) -; RV64-NEXT: ld a0, %lo(.LCPI31_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI31_3) -; RV64-NEXT: ld a1, %lo(.LCPI31_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1675,25 +1739,33 @@ ; RV64-LABEL: vp_ctpop_v16i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI32_0) -; RV64-NEXT: ld a0, %lo(.LCPI32_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI32_1) -; RV64-NEXT: ld a1, %lo(.LCPI32_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI32_2) -; RV64-NEXT: ld a0, %lo(.LCPI32_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI32_3) -; RV64-NEXT: ld a1, %lo(.LCPI32_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -1744,25 +1816,33 @@ ; RV64-LABEL: vp_ctpop_v16i64_unmasked: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI33_0) -; RV64-NEXT: ld a0, %lo(.LCPI33_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI33_1) -; RV64-NEXT: ld a1, %lo(.LCPI33_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI33_2) -; RV64-NEXT: ld a0, %lo(.LCPI33_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI33_3) -; RV64-NEXT: ld a1, %lo(.LCPI33_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2005,24 +2085,32 @@ ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB34_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: lui a1, %hi(.LCPI34_0) -; RV64-NEXT: ld a1, %lo(.LCPI34_0)(a1) -; RV64-NEXT: lui a2, %hi(.LCPI34_1) -; RV64-NEXT: ld a2, %lo(.LCPI34_1)(a2) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: add a2, a2, a3 ; RV64-NEXT: vand.vx v16, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a3, %hi(.LCPI34_2) -; RV64-NEXT: ld a3, %lo(.LCPI34_2)(a3) -; RV64-NEXT: lui a4, %hi(.LCPI34_3) -; RV64-NEXT: ld a4, %lo(.LCPI34_3)(a4) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 ; RV64-NEXT: vand.vx v8, v8, a3, v0.t +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: li a5, 56 ; RV64-NEXT: vsrl.vx v8, v8, a5, v0.t @@ -2192,24 +2280,32 @@ ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB35_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: lui a1, %hi(.LCPI35_0) -; RV64-NEXT: ld a1, %lo(.LCPI35_0)(a1) -; RV64-NEXT: lui a2, %hi(.LCPI35_1) -; RV64-NEXT: ld a2, %lo(.LCPI35_1)(a2) ; RV64-NEXT: vsrl.vi v24, v8, 1 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 ; RV64-NEXT: vand.vx v24, v24, a1 ; RV64-NEXT: vsub.vv v8, v8, v24 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: add a2, a2, a3 ; RV64-NEXT: vand.vx v24, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 ; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v24, v8 -; RV64-NEXT: lui a3, %hi(.LCPI35_2) -; RV64-NEXT: ld a3, %lo(.LCPI35_2)(a3) -; RV64-NEXT: lui a4, %hi(.LCPI35_3) -; RV64-NEXT: ld a4, %lo(.LCPI35_3)(a4) ; RV64-NEXT: vsrl.vi v24, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v24 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 ; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a5, 56 ; RV64-NEXT: vsrl.vx v8, v8, a5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll @@ -304,25 +304,33 @@ ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI3_0) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) -; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI3_1) -; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 +; LMULMAX2-RV64-NEXT: lui a1, 349525 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64-NEXT: add a1, a1, a2 ; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a2 +; LMULMAX2-RV64-NEXT: lui a1, 209715 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64-NEXT: add a1, a1, a2 +; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a1 ; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a2 +; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI3_2) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) -; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI3_3) -; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v9 +; LMULMAX2-RV64-NEXT: lui a1, 61681 +; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64-NEXT: add a1, a1, a2 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a2 +; LMULMAX2-RV64-NEXT: lui a1, 4112 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 +; LMULMAX2-RV64-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64-NEXT: add a1, a1, a2 +; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: li a1, 56 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) @@ -372,25 +380,33 @@ ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI3_0) -; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI3_1) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 +; LMULMAX1-RV64-NEXT: lui a1, 349525 +; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 +; LMULMAX1-RV64-NEXT: slli a2, a1, 32 +; LMULMAX1-RV64-NEXT: add a1, a1, a2 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a2 +; LMULMAX1-RV64-NEXT: lui a1, 209715 +; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 +; LMULMAX1-RV64-NEXT: slli a2, a1, 32 +; LMULMAX1-RV64-NEXT: add a1, a1, a2 +; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI3_2) -; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI3_3) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 +; LMULMAX1-RV64-NEXT: lui a1, 61681 +; LMULMAX1-RV64-NEXT: addiw a1, a1, -241 +; LMULMAX1-RV64-NEXT: slli a2, a1, 32 +; LMULMAX1-RV64-NEXT: add a1, a1, a2 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 +; LMULMAX1-RV64-NEXT: lui a1, 4112 +; LMULMAX1-RV64-NEXT: addiw a1, a1, 257 +; LMULMAX1-RV64-NEXT: slli a2, a1, 32 +; LMULMAX1-RV64-NEXT: add a1, a1, a2 +; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1 ; LMULMAX1-RV64-NEXT: li a1, 56 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) @@ -796,25 +812,33 @@ ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI7_0) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI7_0)(a1) -; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI7_1) -; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI7_1)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV64-NEXT: lui a1, 349525 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64-NEXT: add a1, a1, a2 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a2 +; LMULMAX2-RV64-NEXT: lui a1, 209715 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64-NEXT: add a1, a1, a2 +; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 ; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a2 +; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI7_2) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI7_2)(a1) -; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI7_3) -; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV64-NEXT: lui a1, 61681 +; LMULMAX2-RV64-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64-NEXT: add a1, a1, a2 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a2 +; LMULMAX2-RV64-NEXT: lui a1, 4112 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 +; LMULMAX2-RV64-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64-NEXT: add a1, a1, a2 +; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: li a1, 56 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) @@ -878,44 +902,52 @@ ; LMULMAX1-RV64-LABEL: ctpop_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI7_0) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI7_0)(a2) -; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI7_1) -; LMULMAX1-RV64-NEXT: ld a3, %lo(.LCPI7_1)(a3) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: lui a4, %hi(.LCPI7_2) -; LMULMAX1-RV64-NEXT: ld a4, %lo(.LCPI7_2)(a4) -; LMULMAX1-RV64-NEXT: lui a5, %hi(.LCPI7_3) -; LMULMAX1-RV64-NEXT: ld a5, %lo(.LCPI7_3)(a5) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: li a6, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a6 +; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) +; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX1-RV64-NEXT: lui a2, 349525 +; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365 +; LMULMAX1-RV64-NEXT: slli a3, a2, 32 +; LMULMAX1-RV64-NEXT: add a2, a2, a3 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 +; LMULMAX1-RV64-NEXT: lui a3, 209715 +; LMULMAX1-RV64-NEXT: addiw a3, a3, 819 +; LMULMAX1-RV64-NEXT: slli a4, a3, 32 +; LMULMAX1-RV64-NEXT: add a3, a3, a4 ; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a3 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 +; LMULMAX1-RV64-NEXT: lui a4, 61681 +; LMULMAX1-RV64-NEXT: addiw a4, a4, -241 +; LMULMAX1-RV64-NEXT: slli a5, a4, 32 +; LMULMAX1-RV64-NEXT: add a4, a4, a5 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 +; LMULMAX1-RV64-NEXT: lui a5, 4112 +; LMULMAX1-RV64-NEXT: addiw a5, a5, 257 +; LMULMAX1-RV64-NEXT: slli a6, a5, 32 +; LMULMAX1-RV64-NEXT: add a5, a5, a6 ; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a5 +; LMULMAX1-RV64-NEXT: li a6, 56 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a6 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a1) +; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 +; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10 +; LMULMAX1-RV64-NEXT: vand.vx v10, v9, a3 +; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3 +; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 +; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 +; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 +; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a5 +; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a6 +; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -1283,25 +1283,33 @@ ; RV64-NEXT: vsub.vx v9, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI24_0) -; RV64-NEXT: ld a0, %lo(.LCPI24_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI24_1) -; RV64-NEXT: ld a1, %lo(.LCPI24_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: vand.vx v9, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI24_2) -; RV64-NEXT: ld a0, %lo(.LCPI24_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI24_3) -; RV64-NEXT: ld a1, %lo(.LCPI24_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -1362,25 +1370,33 @@ ; RV64-NEXT: vsub.vx v9, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: lui a0, %hi(.LCPI25_0) -; RV64-NEXT: ld a0, %lo(.LCPI25_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI25_1) -; RV64-NEXT: ld a1, %lo(.LCPI25_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: lui a0, %hi(.LCPI25_2) -; RV64-NEXT: ld a0, %lo(.LCPI25_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI25_3) -; RV64-NEXT: ld a1, %lo(.LCPI25_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1445,25 +1461,33 @@ ; RV64-NEXT: vsub.vx v10, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI26_0) -; RV64-NEXT: ld a0, %lo(.LCPI26_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI26_1) -; RV64-NEXT: ld a1, %lo(.LCPI26_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI26_2) -; RV64-NEXT: ld a0, %lo(.LCPI26_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI26_3) -; RV64-NEXT: ld a1, %lo(.LCPI26_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -1524,25 +1548,33 @@ ; RV64-NEXT: vsub.vx v10, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v10 -; RV64-NEXT: lui a0, %hi(.LCPI27_0) -; RV64-NEXT: ld a0, %lo(.LCPI27_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI27_1) -; RV64-NEXT: ld a1, %lo(.LCPI27_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: lui a0, %hi(.LCPI27_2) -; RV64-NEXT: ld a0, %lo(.LCPI27_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI27_3) -; RV64-NEXT: ld a1, %lo(.LCPI27_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1607,25 +1639,33 @@ ; RV64-NEXT: vsub.vx v12, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI28_0) -; RV64-NEXT: ld a0, %lo(.LCPI28_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI28_1) -; RV64-NEXT: ld a1, %lo(.LCPI28_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: vand.vx v12, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI28_2) -; RV64-NEXT: ld a0, %lo(.LCPI28_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI28_3) -; RV64-NEXT: ld a1, %lo(.LCPI28_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -1686,25 +1726,33 @@ ; RV64-NEXT: vsub.vx v12, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v12 -; RV64-NEXT: lui a0, %hi(.LCPI29_0) -; RV64-NEXT: ld a0, %lo(.LCPI29_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI29_1) -; RV64-NEXT: ld a1, %lo(.LCPI29_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: lui a0, %hi(.LCPI29_2) -; RV64-NEXT: ld a0, %lo(.LCPI29_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI29_3) -; RV64-NEXT: ld a1, %lo(.LCPI29_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1770,25 +1818,33 @@ ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI30_0) -; RV64-NEXT: ld a0, %lo(.LCPI30_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI30_1) -; RV64-NEXT: ld a1, %lo(.LCPI30_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI30_2) -; RV64-NEXT: ld a0, %lo(.LCPI30_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI30_3) -; RV64-NEXT: ld a1, %lo(.LCPI30_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -1850,25 +1906,33 @@ ; RV64-NEXT: vsub.vx v16, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: lui a0, %hi(.LCPI31_0) -; RV64-NEXT: ld a0, %lo(.LCPI31_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI31_1) -; RV64-NEXT: ld a1, %lo(.LCPI31_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI31_2) -; RV64-NEXT: ld a0, %lo(.LCPI31_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI31_3) -; RV64-NEXT: ld a1, %lo(.LCPI31_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1934,25 +1998,33 @@ ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI32_0) -; RV64-NEXT: ld a0, %lo(.LCPI32_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI32_1) -; RV64-NEXT: ld a1, %lo(.LCPI32_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI32_2) -; RV64-NEXT: ld a0, %lo(.LCPI32_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI32_3) -; RV64-NEXT: ld a1, %lo(.LCPI32_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -2014,25 +2086,33 @@ ; RV64-NEXT: vsub.vx v16, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: lui a0, %hi(.LCPI33_0) -; RV64-NEXT: ld a0, %lo(.LCPI33_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI33_1) -; RV64-NEXT: ld a1, %lo(.LCPI33_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI33_2) -; RV64-NEXT: ld a0, %lo(.LCPI33_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI33_3) -; RV64-NEXT: ld a1, %lo(.LCPI33_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2343,24 +2423,32 @@ ; RV64-NEXT: vsub.vx v16, v8, a2, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a1, %hi(.LCPI34_0) -; RV64-NEXT: ld a1, %lo(.LCPI34_0)(a1) -; RV64-NEXT: lui a3, %hi(.LCPI34_1) -; RV64-NEXT: ld a3, %lo(.LCPI34_1)(a3) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a3, a1, 32 +; RV64-NEXT: add a1, a1, a3 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 ; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a4, %hi(.LCPI34_2) -; RV64-NEXT: ld a4, %lo(.LCPI34_2)(a4) -; RV64-NEXT: lui a5, %hi(.LCPI34_3) -; RV64-NEXT: ld a5, %lo(.LCPI34_3)(a5) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: addiw a4, a4, -241 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vand.vx v8, v8, a4, v0.t +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: vmul.vx v8, v8, a5, v0.t ; RV64-NEXT: li a6, 56 ; RV64-NEXT: vsrl.vx v8, v8, a6, v0.t @@ -2548,24 +2636,32 @@ ; RV64-NEXT: vsub.vx v24, v8, a2 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v24 -; RV64-NEXT: lui a1, %hi(.LCPI35_0) -; RV64-NEXT: ld a1, %lo(.LCPI35_0)(a1) -; RV64-NEXT: lui a3, %hi(.LCPI35_1) -; RV64-NEXT: ld a3, %lo(.LCPI35_1)(a3) ; RV64-NEXT: vsrl.vi v24, v8, 1 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a3, a1, 32 +; RV64-NEXT: add a1, a1, a3 ; RV64-NEXT: vand.vx v24, v24, a1 ; RV64-NEXT: vsub.vv v8, v8, v24 +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 ; RV64-NEXT: vand.vx v24, v8, a3 ; RV64-NEXT: vsrl.vi v8, v8, 2 ; RV64-NEXT: vand.vx v8, v8, a3 ; RV64-NEXT: vadd.vv v8, v24, v8 -; RV64-NEXT: lui a4, %hi(.LCPI35_2) -; RV64-NEXT: ld a4, %lo(.LCPI35_2)(a4) -; RV64-NEXT: lui a5, %hi(.LCPI35_3) -; RV64-NEXT: ld a5, %lo(.LCPI35_3)(a5) ; RV64-NEXT: vsrl.vi v24, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v24 +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: addiw a4, a4, -241 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vand.vx v8, v8, a4 +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: vmul.vx v8, v8, a5 ; RV64-NEXT: li a6, 56 ; RV64-NEXT: vsrl.vx v8, v8, a6 @@ -3849,25 +3945,33 @@ ; RV64-NEXT: vsub.vx v9, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v9, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI60_0) -; RV64-NEXT: ld a0, %lo(.LCPI60_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI60_1) -; RV64-NEXT: ld a1, %lo(.LCPI60_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: vand.vx v9, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v9, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI60_2) -; RV64-NEXT: ld a0, %lo(.LCPI60_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI60_3) -; RV64-NEXT: ld a1, %lo(.LCPI60_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -3928,25 +4032,33 @@ ; RV64-NEXT: vsub.vx v9, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: lui a0, %hi(.LCPI61_0) -; RV64-NEXT: ld a0, %lo(.LCPI61_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI61_1) -; RV64-NEXT: ld a1, %lo(.LCPI61_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v9, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v9, v8 -; RV64-NEXT: lui a0, %hi(.LCPI61_2) -; RV64-NEXT: ld a0, %lo(.LCPI61_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI61_3) -; RV64-NEXT: ld a1, %lo(.LCPI61_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -4009,25 +4121,33 @@ ; RV64-NEXT: vsub.vx v10, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v10, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI62_0) -; RV64-NEXT: ld a0, %lo(.LCPI62_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI62_1) -; RV64-NEXT: ld a1, %lo(.LCPI62_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v10, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI62_2) -; RV64-NEXT: ld a0, %lo(.LCPI62_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI62_3) -; RV64-NEXT: ld a1, %lo(.LCPI62_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -4088,25 +4208,33 @@ ; RV64-NEXT: vsub.vx v10, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v10 -; RV64-NEXT: lui a0, %hi(.LCPI63_0) -; RV64-NEXT: ld a0, %lo(.LCPI63_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI63_1) -; RV64-NEXT: ld a1, %lo(.LCPI63_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v10, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v10, v8 -; RV64-NEXT: lui a0, %hi(.LCPI63_2) -; RV64-NEXT: ld a0, %lo(.LCPI63_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI63_3) -; RV64-NEXT: ld a1, %lo(.LCPI63_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -4169,25 +4297,33 @@ ; RV64-NEXT: vsub.vx v12, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v12, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI64_0) -; RV64-NEXT: ld a0, %lo(.LCPI64_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI64_1) -; RV64-NEXT: ld a1, %lo(.LCPI64_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: vand.vx v12, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v12, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI64_2) -; RV64-NEXT: ld a0, %lo(.LCPI64_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI64_3) -; RV64-NEXT: ld a1, %lo(.LCPI64_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -4248,25 +4384,33 @@ ; RV64-NEXT: vsub.vx v12, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v12 -; RV64-NEXT: lui a0, %hi(.LCPI65_0) -; RV64-NEXT: ld a0, %lo(.LCPI65_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI65_1) -; RV64-NEXT: ld a1, %lo(.LCPI65_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v12, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v12, v8 -; RV64-NEXT: lui a0, %hi(.LCPI65_2) -; RV64-NEXT: ld a0, %lo(.LCPI65_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI65_3) -; RV64-NEXT: ld a1, %lo(.LCPI65_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -4330,25 +4474,33 @@ ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI66_0) -; RV64-NEXT: ld a0, %lo(.LCPI66_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI66_1) -; RV64-NEXT: ld a1, %lo(.LCPI66_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI66_2) -; RV64-NEXT: ld a0, %lo(.LCPI66_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI66_3) -; RV64-NEXT: ld a1, %lo(.LCPI66_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -4410,25 +4562,33 @@ ; RV64-NEXT: vsub.vx v16, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: lui a0, %hi(.LCPI67_0) -; RV64-NEXT: ld a0, %lo(.LCPI67_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI67_1) -; RV64-NEXT: ld a1, %lo(.LCPI67_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI67_2) -; RV64-NEXT: ld a0, %lo(.LCPI67_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI67_3) -; RV64-NEXT: ld a1, %lo(.LCPI67_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -4492,25 +4652,33 @@ ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI68_0) -; RV64-NEXT: ld a0, %lo(.LCPI68_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI68_1) -; RV64-NEXT: ld a1, %lo(.LCPI68_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a0, %hi(.LCPI68_2) -; RV64-NEXT: ld a0, %lo(.LCPI68_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI68_3) -; RV64-NEXT: ld a1, %lo(.LCPI68_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vmul.vx v8, v8, a1, v0.t +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret @@ -4572,25 +4740,33 @@ ; RV64-NEXT: vsub.vx v16, v8, a1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: lui a0, %hi(.LCPI69_0) -; RV64-NEXT: ld a0, %lo(.LCPI69_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI69_1) -; RV64-NEXT: ld a1, %lo(.LCPI69_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: lui a0, 349525 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: lui a0, 209715 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vand.vx v16, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v16, v8 -; RV64-NEXT: lui a0, %hi(.LCPI69_2) -; RV64-NEXT: ld a0, %lo(.LCPI69_2)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI69_3) -; RV64-NEXT: ld a1, %lo(.LCPI69_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: lui a0, 4112 +; RV64-NEXT: addiw a0, a0, 257 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -4899,24 +5075,32 @@ ; RV64-NEXT: vsub.vx v16, v8, a2, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: lui a1, %hi(.LCPI70_0) -; RV64-NEXT: ld a1, %lo(.LCPI70_0)(a1) -; RV64-NEXT: lui a3, %hi(.LCPI70_1) -; RV64-NEXT: ld a3, %lo(.LCPI70_1)(a3) ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a3, a1, 32 +; RV64-NEXT: add a1, a1, a3 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 ; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: lui a4, %hi(.LCPI70_2) -; RV64-NEXT: ld a4, %lo(.LCPI70_2)(a4) -; RV64-NEXT: lui a5, %hi(.LCPI70_3) -; RV64-NEXT: ld a5, %lo(.LCPI70_3)(a5) ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: addiw a4, a4, -241 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vand.vx v8, v8, a4, v0.t +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: vmul.vx v8, v8, a5, v0.t ; RV64-NEXT: li a6, 56 ; RV64-NEXT: vsrl.vx v8, v8, a6, v0.t @@ -5104,24 +5288,32 @@ ; RV64-NEXT: vsub.vx v24, v8, a2 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v24 -; RV64-NEXT: lui a1, %hi(.LCPI71_0) -; RV64-NEXT: ld a1, %lo(.LCPI71_0)(a1) -; RV64-NEXT: lui a3, %hi(.LCPI71_1) -; RV64-NEXT: ld a3, %lo(.LCPI71_1)(a3) ; RV64-NEXT: vsrl.vi v24, v8, 1 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a3, a1, 32 +; RV64-NEXT: add a1, a1, a3 ; RV64-NEXT: vand.vx v24, v24, a1 ; RV64-NEXT: vsub.vv v8, v8, v24 +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a3, a3, a4 ; RV64-NEXT: vand.vx v24, v8, a3 ; RV64-NEXT: vsrl.vi v8, v8, 2 ; RV64-NEXT: vand.vx v8, v8, a3 ; RV64-NEXT: vadd.vv v8, v24, v8 -; RV64-NEXT: lui a4, %hi(.LCPI71_2) -; RV64-NEXT: ld a4, %lo(.LCPI71_2)(a4) -; RV64-NEXT: lui a5, %hi(.LCPI71_3) -; RV64-NEXT: ld a5, %lo(.LCPI71_3)(a5) ; RV64-NEXT: vsrl.vi v24, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v24 +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: addiw a4, a4, -241 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vand.vx v8, v8, a4 +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: vmul.vx v8, v8, a5 ; RV64-NEXT: li a6, 56 ; RV64-NEXT: vsrl.vx v8, v8, a6 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -485,25 +485,33 @@ ; LMULMAX2-RV64I-NEXT: vsub.vx v9, v8, a1 ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 ; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: lui a1, %hi(.LCPI3_0) -; LMULMAX2-RV64I-NEXT: ld a1, %lo(.LCPI3_0)(a1) -; LMULMAX2-RV64I-NEXT: lui a2, %hi(.LCPI3_1) -; LMULMAX2-RV64I-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 1 +; LMULMAX2-RV64I-NEXT: lui a1, 349525 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64I-NEXT: add a1, a1, a2 ; LMULMAX2-RV64I-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a2 +; LMULMAX2-RV64I-NEXT: lui a1, 209715 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64I-NEXT: add a1, a1, a2 +; LMULMAX2-RV64I-NEXT: vand.vx v9, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a2 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV64I-NEXT: lui a1, %hi(.LCPI3_2) -; LMULMAX2-RV64I-NEXT: ld a1, %lo(.LCPI3_2)(a1) -; LMULMAX2-RV64I-NEXT: lui a2, %hi(.LCPI3_3) -; LMULMAX2-RV64I-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX2-RV64I-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v9 +; LMULMAX2-RV64I-NEXT: lui a1, 61681 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64I-NEXT: add a1, a1, a2 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a2 +; LMULMAX2-RV64I-NEXT: lui a1, 4112 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 +; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64I-NEXT: add a1, a1, a2 +; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: li a1, 56 ; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0) @@ -1141,25 +1149,33 @@ ; LMULMAX2-RV64I-NEXT: vsub.vx v10, v8, a1 ; LMULMAX2-RV64I-NEXT: vnot.v v8, v8 ; LMULMAX2-RV64I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: lui a1, %hi(.LCPI7_0) -; LMULMAX2-RV64I-NEXT: ld a1, %lo(.LCPI7_0)(a1) -; LMULMAX2-RV64I-NEXT: lui a2, %hi(.LCPI7_1) -; LMULMAX2-RV64I-NEXT: ld a2, %lo(.LCPI7_1)(a2) ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV64I-NEXT: lui a1, 349525 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64I-NEXT: add a1, a1, a2 ; LMULMAX2-RV64I-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a2 +; LMULMAX2-RV64I-NEXT: lui a1, 209715 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 819 +; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64I-NEXT: add a1, a1, a2 +; LMULMAX2-RV64I-NEXT: vand.vx v10, v8, a1 ; LMULMAX2-RV64I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a2 +; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64I-NEXT: lui a1, %hi(.LCPI7_2) -; LMULMAX2-RV64I-NEXT: ld a1, %lo(.LCPI7_2)(a1) -; LMULMAX2-RV64I-NEXT: lui a2, %hi(.LCPI7_3) -; LMULMAX2-RV64I-NEXT: ld a2, %lo(.LCPI7_3)(a2) ; LMULMAX2-RV64I-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV64I-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV64I-NEXT: lui a1, 61681 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, -241 +; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64I-NEXT: add a1, a1, a2 ; LMULMAX2-RV64I-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a2 +; LMULMAX2-RV64I-NEXT: lui a1, 4112 +; LMULMAX2-RV64I-NEXT: addiw a1, a1, 257 +; LMULMAX2-RV64I-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64I-NEXT: add a1, a1, a2 +; LMULMAX2-RV64I-NEXT: vmul.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: li a1, 56 ; LMULMAX2-RV64I-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64I-NEXT: vse64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1600,16 +1600,20 @@ ; RV64-LABEL: mulhu_v2i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: lui a1, %hi(.LCPI69_0) -; RV64-NEXT: addi a1, a1, %lo(.LCPI69_0) -; RV64-NEXT: vlse64.v v8, (a1), zero -; RV64-NEXT: lui a1, %hi(.LCPI69_1) -; RV64-NEXT: ld a1, %lo(.LCPI69_1)(a1) -; RV64-NEXT: vle64.v v9, (a0) +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: lui a1, 838861 +; RV64-NEXT: addiw a1, a1, -819 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: vmv.v.x v9, a1 +; RV64-NEXT: lui a1, 699051 +; RV64-NEXT: addiw a1, a1, -1365 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; RV64-NEXT: vmv.s.x v8, a1 +; RV64-NEXT: vmv.s.x v9, a1 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; RV64-NEXT: vmulhu.vv v8, v9, v8 +; RV64-NEXT: vmulhu.vv v8, v8, v9 ; RV64-NEXT: vid.v v9 ; RV64-NEXT: vadd.vi v9, v9, 1 ; RV64-NEXT: vsrl.vv v8, v8, v9 @@ -1851,19 +1855,21 @@ ; RV64-LABEL: mulhs_v2i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: lui a1, %hi(.LCPI74_0) -; RV64-NEXT: addi a1, a1, %lo(.LCPI74_0) -; RV64-NEXT: vlse64.v v8, (a1), zero -; RV64-NEXT: lui a1, %hi(.LCPI74_1) -; RV64-NEXT: ld a1, %lo(.LCPI74_1)(a1) -; RV64-NEXT: vle64.v v9, (a0) +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: lui a2, %hi(.LCPI74_0) +; RV64-NEXT: ld a2, %lo(.LCPI74_0)(a2) +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vmv.v.x v9, a1 ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; RV64-NEXT: vmv.s.x v8, a1 +; RV64-NEXT: vmv.s.x v9, a2 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; RV64-NEXT: vmulh.vv v8, v9, v8 +; RV64-NEXT: vmulh.vv v9, v8, v9 ; RV64-NEXT: vid.v v10 ; RV64-NEXT: vrsub.vi v11, v10, 0 -; RV64-NEXT: vmadd.vv v11, v9, v8 +; RV64-NEXT: vmadd.vv v11, v8, v9 ; RV64-NEXT: li a1, 63 ; RV64-NEXT: vsrl.vx v8, v11, a1 ; RV64-NEXT: vsra.vv v9, v11, v10 @@ -5857,13 +5863,17 @@ ; LMULMAX1-RV64-NEXT: vmulhu.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vid.v v10 -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI184_2) -; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI184_2) -; LMULMAX1-RV64-NEXT: vlse64.v v11, (a2), zero -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI184_3) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI184_3)(a2) -; LMULMAX1-RV64-NEXT: vadd.vi v12, v10, 2 -; LMULMAX1-RV64-NEXT: vsrl.vv v9, v9, v12 +; LMULMAX1-RV64-NEXT: vadd.vi v11, v10, 2 +; LMULMAX1-RV64-NEXT: vsrl.vv v9, v9, v11 +; LMULMAX1-RV64-NEXT: lui a2, 838861 +; LMULMAX1-RV64-NEXT: addiw a2, a2, -819 +; LMULMAX1-RV64-NEXT: slli a3, a2, 32 +; LMULMAX1-RV64-NEXT: add a2, a2, a3 +; LMULMAX1-RV64-NEXT: vmv.v.x v11, a2 +; LMULMAX1-RV64-NEXT: lui a2, 699051 +; LMULMAX1-RV64-NEXT: addiw a2, a2, -1365 +; LMULMAX1-RV64-NEXT: slli a3, a2, 32 +; LMULMAX1-RV64-NEXT: add a2, a2, a3 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -6150,25 +6160,27 @@ ; LMULMAX2-RV64-LABEL: mulhs_v4i64: ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX2-RV64-NEXT: li a1, 5 ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI188_0) -; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI188_0) -; LMULMAX2-RV64-NEXT: vlse64.v v8, (a1), zero -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI188_1) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI188_1)(a1) -; LMULMAX2-RV64-NEXT: vle64.v v10, (a0) +; LMULMAX2-RV64-NEXT: lui a1, 349525 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI188_0) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI188_0)(a2) +; LMULMAX2-RV64-NEXT: slli a3, a1, 32 +; LMULMAX2-RV64-NEXT: add a1, a1, a3 +; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a2, v0 +; LMULMAX2-RV64-NEXT: vmulh.vv v10, v8, v10 ; LMULMAX2-RV64-NEXT: vmv.v.i v12, -1 ; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 0, v0 -; LMULMAX2-RV64-NEXT: vmerge.vxm v8, v8, a1, v0 -; LMULMAX2-RV64-NEXT: vmulh.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vmacc.vv v8, v10, v12 +; LMULMAX2-RV64-NEXT: vmadd.vv v12, v8, v10 ; LMULMAX2-RV64-NEXT: li a1, 63 -; LMULMAX2-RV64-NEXT: vsrl.vx v10, v8, a1 -; LMULMAX2-RV64-NEXT: vmv.v.i v12, 1 -; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 0, v0 -; LMULMAX2-RV64-NEXT: vsra.vv v8, v8, v12 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV64-NEXT: vsrl.vx v8, v12, a1 +; LMULMAX2-RV64-NEXT: vmv.v.i v10, 1 +; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 0, v0 +; LMULMAX2-RV64-NEXT: vsra.vv v10, v12, v10 +; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ; @@ -6193,31 +6205,33 @@ ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI188_0) -; LMULMAX1-RV64-NEXT: addi a1, a1, %lo(.LCPI188_0) -; LMULMAX1-RV64-NEXT: vlse64.v v9, (a1), zero -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI188_1) -; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI188_1)(a1) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: lui a2, 349525 +; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365 +; LMULMAX1-RV64-NEXT: slli a3, a2, 32 +; LMULMAX1-RV64-NEXT: add a2, a2, a3 +; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI188_0) +; LMULMAX1-RV64-NEXT: ld a3, %lo(.LCPI188_0)(a3) +; LMULMAX1-RV64-NEXT: vle64.v v9, (a1) +; LMULMAX1-RV64-NEXT: vmv.v.x v10, a2 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v9, a1 +; LMULMAX1-RV64-NEXT: vmv.s.x v10, a3 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmulh.vv v11, v10, v9 +; LMULMAX1-RV64-NEXT: vmulh.vv v11, v9, v10 ; LMULMAX1-RV64-NEXT: vid.v v12 ; LMULMAX1-RV64-NEXT: vrsub.vi v13, v12, 0 -; LMULMAX1-RV64-NEXT: vmacc.vv v11, v13, v10 -; LMULMAX1-RV64-NEXT: li a1, 63 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v11, a1 +; LMULMAX1-RV64-NEXT: vmacc.vv v11, v13, v9 +; LMULMAX1-RV64-NEXT: li a2, 63 +; LMULMAX1-RV64-NEXT: vsrl.vx v9, v11, a2 ; LMULMAX1-RV64-NEXT: vsra.vv v11, v11, v12 -; LMULMAX1-RV64-NEXT: vadd.vv v10, v11, v10 -; LMULMAX1-RV64-NEXT: vmulh.vv v9, v8, v9 -; LMULMAX1-RV64-NEXT: vmacc.vv v9, v8, v13 -; LMULMAX1-RV64-NEXT: vsrl.vx v8, v9, a1 -; LMULMAX1-RV64-NEXT: vsra.vv v9, v9, v12 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 +; LMULMAX1-RV64-NEXT: vadd.vv v9, v11, v9 +; LMULMAX1-RV64-NEXT: vmulh.vv v10, v8, v10 +; LMULMAX1-RV64-NEXT: vmacc.vv v10, v8, v13 +; LMULMAX1-RV64-NEXT: vsrl.vx v8, v10, a2 +; LMULMAX1-RV64-NEXT: vsra.vv v10, v10, v12 +; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v10, (a2) +; LMULMAX1-RV64-NEXT: vse64.v v9, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, ptr %x %b = sdiv <4 x i64> %a, @@ -8831,8 +8845,10 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: lui a1, %hi(.LCPI317_0) -; RV64-NEXT: ld a1, %lo(.LCPI317_0)(a1) +; RV64-NEXT: lui a1, 699051 +; RV64-NEXT: addiw a1, a1, -1365 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 ; RV64-NEXT: vmulhu.vx v8, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 1 ; RV64-NEXT: vse64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll --- a/llvm/test/CodeGen/RISCV/sextw-removal.ll +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -322,15 +322,23 @@ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a2, %hi(.LCPI6_0) -; RV64I-NEXT: ld s0, %lo(.LCPI6_0)(a2) -; RV64I-NEXT: lui a2, %hi(.LCPI6_1) -; RV64I-NEXT: ld s1, %lo(.LCPI6_1)(a2) -; RV64I-NEXT: lui a2, %hi(.LCPI6_2) -; RV64I-NEXT: ld s2, %lo(.LCPI6_2)(a2) -; RV64I-NEXT: lui a2, %hi(.LCPI6_3) -; RV64I-NEXT: ld s3, %lo(.LCPI6_3)(a2) ; RV64I-NEXT: sraw a0, a0, a1 +; RV64I-NEXT: lui a1, 349525 +; RV64I-NEXT: addiw s0, a1, 1365 +; RV64I-NEXT: slli a1, s0, 32 +; RV64I-NEXT: add s0, s0, a1 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw s1, a1, 819 +; RV64I-NEXT: slli a1, s1, 32 +; RV64I-NEXT: add s1, s1, a1 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw s2, a1, -241 +; RV64I-NEXT: slli a1, s2, 32 +; RV64I-NEXT: add s2, s2, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw s3, a1, 257 +; RV64I-NEXT: slli a1, s3, 32 +; RV64I-NEXT: add s3, s3, a1 ; RV64I-NEXT: .LBB6_1: # %bb2 ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: call foo@plt diff --git a/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll b/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll --- a/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll @@ -31,27 +31,29 @@ ; ; RV64-LABEL: test_udiv_3: ; RV64: # %bb.0: -; RV64-NEXT: lui a2, %hi(.LCPI0_0) -; RV64-NEXT: ld a2, %lo(.LCPI0_0)(a2) -; RV64-NEXT: add a3, a0, a1 -; RV64-NEXT: sltu a4, a3, a0 +; RV64-NEXT: add a2, a0, a1 +; RV64-NEXT: sltu a3, a2, a0 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: lui a3, 699051 +; RV64-NEXT: addiw a3, a3, -1365 +; RV64-NEXT: slli a4, a3, 32 ; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: mulhu a4, a3, a2 +; RV64-NEXT: mulhu a4, a2, a3 ; RV64-NEXT: srli a5, a4, 1 ; RV64-NEXT: andi a4, a4, -2 -; RV64-NEXT: lui a6, %hi(.LCPI0_1) -; RV64-NEXT: ld a6, %lo(.LCPI0_1)(a6) +; RV64-NEXT: lui a6, %hi(.LCPI0_0) +; RV64-NEXT: ld a6, %lo(.LCPI0_0)(a6) ; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: sub a3, a3, a4 -; RV64-NEXT: sub a4, a0, a3 +; RV64-NEXT: sub a2, a2, a4 +; RV64-NEXT: sub a4, a0, a2 ; RV64-NEXT: mul a5, a4, a6 -; RV64-NEXT: mulhu a6, a4, a2 +; RV64-NEXT: mulhu a6, a4, a3 ; RV64-NEXT: add a5, a6, a5 -; RV64-NEXT: sltu a0, a0, a3 +; RV64-NEXT: sltu a0, a0, a2 ; RV64-NEXT: sub a1, a1, a0 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, a5, a1 -; RV64-NEXT: mul a0, a4, a2 +; RV64-NEXT: mul a0, a4, a3 ; RV64-NEXT: ret %a = udiv iXLen2 %x, 3 ret iXLen2 %a @@ -84,27 +86,29 @@ ; ; RV64-LABEL: test_udiv_5: ; RV64: # %bb.0: -; RV64-NEXT: lui a2, %hi(.LCPI1_0) -; RV64-NEXT: ld a2, %lo(.LCPI1_0)(a2) -; RV64-NEXT: add a3, a0, a1 -; RV64-NEXT: sltu a4, a3, a0 +; RV64-NEXT: add a2, a0, a1 +; RV64-NEXT: sltu a3, a2, a0 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: lui a3, 838861 +; RV64-NEXT: addiw a3, a3, -819 +; RV64-NEXT: slli a4, a3, 32 ; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: mulhu a4, a3, a2 +; RV64-NEXT: mulhu a4, a2, a3 ; RV64-NEXT: srli a5, a4, 2 ; RV64-NEXT: andi a4, a4, -4 -; RV64-NEXT: lui a6, %hi(.LCPI1_1) -; RV64-NEXT: ld a6, %lo(.LCPI1_1)(a6) +; RV64-NEXT: lui a6, %hi(.LCPI1_0) +; RV64-NEXT: ld a6, %lo(.LCPI1_0)(a6) ; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: sub a3, a3, a4 -; RV64-NEXT: sub a4, a0, a3 +; RV64-NEXT: sub a2, a2, a4 +; RV64-NEXT: sub a4, a0, a2 ; RV64-NEXT: mul a5, a4, a6 -; RV64-NEXT: mulhu a6, a4, a2 +; RV64-NEXT: mulhu a6, a4, a3 ; RV64-NEXT: add a5, a6, a5 -; RV64-NEXT: sltu a0, a0, a3 +; RV64-NEXT: sltu a0, a0, a2 ; RV64-NEXT: sub a1, a1, a0 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, a5, a1 -; RV64-NEXT: mul a0, a4, a2 +; RV64-NEXT: mul a0, a4, a3 ; RV64-NEXT: ret %a = udiv iXLen2 %x, 5 ret iXLen2 %a @@ -191,22 +195,26 @@ ; ; RV64-LABEL: test_udiv_15: ; RV64: # %bb.0: -; RV64-NEXT: lui a2, %hi(.LCPI4_0) -; RV64-NEXT: ld a2, %lo(.LCPI4_0)(a2) -; RV64-NEXT: add a3, a0, a1 -; RV64-NEXT: sltu a4, a3, a0 +; RV64-NEXT: add a2, a0, a1 +; RV64-NEXT: sltu a3, a2, a0 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: lui a3, 559241 +; RV64-NEXT: addiw a3, a3, -1911 +; RV64-NEXT: slli a4, a3, 32 ; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: mulhu a2, a3, a2 -; RV64-NEXT: srli a2, a2, 3 -; RV64-NEXT: slli a4, a2, 4 -; RV64-NEXT: sub a2, a2, a4 -; RV64-NEXT: lui a4, %hi(.LCPI4_1) -; RV64-NEXT: ld a4, %lo(.LCPI4_1)(a4) -; RV64-NEXT: lui a5, %hi(.LCPI4_2) -; RV64-NEXT: ld a5, %lo(.LCPI4_2)(a5) -; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: mulhu a3, a2, a3 +; RV64-NEXT: srli a3, a3, 3 +; RV64-NEXT: slli a4, a3, 4 +; RV64-NEXT: lui a5, %hi(.LCPI4_0) +; RV64-NEXT: ld a5, %lo(.LCPI4_0)(a5) +; RV64-NEXT: sub a3, a3, a4 +; RV64-NEXT: add a2, a2, a3 ; RV64-NEXT: sub a3, a0, a2 -; RV64-NEXT: mul a4, a3, a4 +; RV64-NEXT: mul a4, a3, a5 +; RV64-NEXT: lui a5, 978671 +; RV64-NEXT: addiw a5, a5, -273 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: mulhu a6, a3, a5 ; RV64-NEXT: add a4, a6, a4 ; RV64-NEXT: sltu a0, a0, a2 @@ -246,27 +254,29 @@ ; ; RV64-LABEL: test_udiv_17: ; RV64: # %bb.0: -; RV64-NEXT: lui a2, %hi(.LCPI5_0) -; RV64-NEXT: ld a2, %lo(.LCPI5_0)(a2) -; RV64-NEXT: add a3, a0, a1 -; RV64-NEXT: sltu a4, a3, a0 +; RV64-NEXT: add a2, a0, a1 +; RV64-NEXT: sltu a3, a2, a0 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: lui a3, 986895 +; RV64-NEXT: addiw a3, a3, 241 +; RV64-NEXT: slli a4, a3, 32 ; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: mulhu a4, a3, a2 +; RV64-NEXT: mulhu a4, a2, a3 ; RV64-NEXT: srli a5, a4, 4 ; RV64-NEXT: andi a4, a4, -16 -; RV64-NEXT: lui a6, %hi(.LCPI5_1) -; RV64-NEXT: ld a6, %lo(.LCPI5_1)(a6) +; RV64-NEXT: lui a6, %hi(.LCPI5_0) +; RV64-NEXT: ld a6, %lo(.LCPI5_0)(a6) ; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: sub a3, a3, a4 -; RV64-NEXT: sub a4, a0, a3 +; RV64-NEXT: sub a2, a2, a4 +; RV64-NEXT: sub a4, a0, a2 ; RV64-NEXT: mul a5, a4, a6 -; RV64-NEXT: mulhu a6, a4, a2 +; RV64-NEXT: mulhu a6, a4, a3 ; RV64-NEXT: add a5, a6, a5 -; RV64-NEXT: sltu a0, a0, a3 +; RV64-NEXT: sltu a0, a0, a2 ; RV64-NEXT: sub a1, a1, a0 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, a5, a1 -; RV64-NEXT: mul a0, a4, a2 +; RV64-NEXT: mul a0, a4, a3 ; RV64-NEXT: ret %a = udiv iXLen2 %x, 17 ret iXLen2 %a @@ -301,22 +311,26 @@ ; ; RV64-LABEL: test_udiv_255: ; RV64: # %bb.0: -; RV64-NEXT: lui a2, %hi(.LCPI6_0) -; RV64-NEXT: ld a2, %lo(.LCPI6_0)(a2) -; RV64-NEXT: add a3, a0, a1 -; RV64-NEXT: sltu a4, a3, a0 +; RV64-NEXT: add a2, a0, a1 +; RV64-NEXT: sltu a3, a2, a0 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: lui a3, 526344 +; RV64-NEXT: addiw a3, a3, 129 +; RV64-NEXT: slli a4, a3, 32 ; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: mulhu a2, a3, a2 -; RV64-NEXT: srli a2, a2, 7 -; RV64-NEXT: slli a4, a2, 8 -; RV64-NEXT: sub a2, a2, a4 -; RV64-NEXT: lui a4, %hi(.LCPI6_1) -; RV64-NEXT: ld a4, %lo(.LCPI6_1)(a4) -; RV64-NEXT: lui a5, %hi(.LCPI6_2) -; RV64-NEXT: ld a5, %lo(.LCPI6_2)(a5) -; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: mulhu a3, a2, a3 +; RV64-NEXT: srli a3, a3, 7 +; RV64-NEXT: slli a4, a3, 8 +; RV64-NEXT: lui a5, %hi(.LCPI6_0) +; RV64-NEXT: ld a5, %lo(.LCPI6_0)(a5) +; RV64-NEXT: sub a3, a3, a4 +; RV64-NEXT: add a2, a2, a3 ; RV64-NEXT: sub a3, a0, a2 -; RV64-NEXT: mul a4, a3, a4 +; RV64-NEXT: mul a4, a3, a5 +; RV64-NEXT: lui a5, 1044464 +; RV64-NEXT: addiw a5, a5, -257 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 ; RV64-NEXT: mulhu a6, a3, a5 ; RV64-NEXT: add a4, a6, a4 ; RV64-NEXT: sltu a0, a0, a2 @@ -356,27 +370,29 @@ ; ; RV64-LABEL: test_udiv_257: ; RV64: # %bb.0: -; RV64-NEXT: lui a2, %hi(.LCPI7_0) -; RV64-NEXT: ld a2, %lo(.LCPI7_0)(a2) -; RV64-NEXT: add a3, a0, a1 -; RV64-NEXT: sltu a4, a3, a0 +; RV64-NEXT: add a2, a0, a1 +; RV64-NEXT: sltu a3, a2, a0 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: lui a3, 1044496 +; RV64-NEXT: addiw a3, a3, -255 +; RV64-NEXT: slli a4, a3, 32 ; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: mulhu a4, a3, a2 +; RV64-NEXT: mulhu a4, a2, a3 ; RV64-NEXT: srli a5, a4, 8 ; RV64-NEXT: andi a4, a4, -256 -; RV64-NEXT: lui a6, %hi(.LCPI7_1) -; RV64-NEXT: ld a6, %lo(.LCPI7_1)(a6) +; RV64-NEXT: lui a6, %hi(.LCPI7_0) +; RV64-NEXT: ld a6, %lo(.LCPI7_0)(a6) ; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: sub a3, a3, a4 -; RV64-NEXT: sub a4, a0, a3 +; RV64-NEXT: sub a2, a2, a4 +; RV64-NEXT: sub a4, a0, a2 ; RV64-NEXT: mul a5, a4, a6 -; RV64-NEXT: mulhu a6, a4, a2 +; RV64-NEXT: mulhu a6, a4, a3 ; RV64-NEXT: add a5, a6, a5 -; RV64-NEXT: sltu a0, a0, a3 +; RV64-NEXT: sltu a0, a0, a2 ; RV64-NEXT: sub a1, a1, a0 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, a5, a1 -; RV64-NEXT: mul a0, a4, a2 +; RV64-NEXT: mul a0, a4, a3 ; RV64-NEXT: ret %a = udiv iXLen2 %x, 257 ret iXLen2 %a @@ -414,31 +430,36 @@ ; ; RV64-LABEL: test_udiv_65535: ; RV64: # %bb.0: -; RV64-NEXT: lui a2, %hi(.LCPI8_0) -; RV64-NEXT: ld a2, %lo(.LCPI8_0)(a2) -; RV64-NEXT: add a3, a0, a1 -; RV64-NEXT: sltu a4, a3, a0 +; RV64-NEXT: add a2, a0, a1 +; RV64-NEXT: sltu a3, a2, a0 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: lui a3, 524296 +; RV64-NEXT: addiw a3, a3, 1 +; RV64-NEXT: slli a4, a3, 32 ; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: mulhu a2, a3, a2 -; RV64-NEXT: srli a2, a2, 15 -; RV64-NEXT: slli a4, a2, 16 -; RV64-NEXT: sub a2, a2, a4 -; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: mulhu a3, a2, a3 +; RV64-NEXT: srli a3, a3, 15 +; RV64-NEXT: slli a4, a3, 16 +; RV64-NEXT: sub a3, a3, a4 +; RV64-NEXT: add a2, a2, a3 ; RV64-NEXT: sub a3, a0, a2 ; RV64-NEXT: lui a4, 983039 ; RV64-NEXT: slli a4, a4, 4 ; RV64-NEXT: addi a4, a4, -1 ; RV64-NEXT: slli a4, a4, 16 -; RV64-NEXT: addi a5, a4, -2 -; RV64-NEXT: mul a5, a3, a5 -; RV64-NEXT: addi a4, a4, -1 -; RV64-NEXT: mulhu a6, a3, a4 -; RV64-NEXT: add a5, a6, a5 +; RV64-NEXT: addi a4, a4, -2 +; RV64-NEXT: mul a4, a3, a4 +; RV64-NEXT: lui a5, 1048560 +; RV64-NEXT: addiw a5, a5, -1 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: mulhu a6, a3, a5 +; RV64-NEXT: add a4, a6, a4 ; RV64-NEXT: sltu a0, a0, a2 ; RV64-NEXT: sub a1, a1, a0 -; RV64-NEXT: mul a1, a1, a4 -; RV64-NEXT: add a1, a5, a1 -; RV64-NEXT: mul a0, a3, a4 +; RV64-NEXT: mul a1, a1, a5 +; RV64-NEXT: add a1, a4, a1 +; RV64-NEXT: mul a0, a3, a5 ; RV64-NEXT: ret %a = udiv iXLen2 %x, 65535 ret iXLen2 %a @@ -474,26 +495,28 @@ ; RV64-NEXT: add a2, a0, a1 ; RV64-NEXT: sltu a3, a2, a0 ; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: lui a3, 983041 -; RV64-NEXT: slli a3, a3, 4 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: slli a3, a3, 16 -; RV64-NEXT: addi a4, a3, 1 +; RV64-NEXT: lui a3, 1048560 +; RV64-NEXT: addiw a4, a3, 1 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: mulhu a5, a2, a4 -; RV64-NEXT: lui a6, 1048560 -; RV64-NEXT: and a6, a5, a6 +; RV64-NEXT: and a3, a5, a3 ; RV64-NEXT: srli a5, a5, 16 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: sub a2, a2, a3 +; RV64-NEXT: sub a3, a0, a2 +; RV64-NEXT: lui a5, 983041 +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: addi a5, a5, -1 +; RV64-NEXT: slli a5, a5, 16 +; RV64-NEXT: mul a5, a3, a5 +; RV64-NEXT: mulhu a6, a3, a4 ; RV64-NEXT: add a5, a6, a5 -; RV64-NEXT: sub a2, a2, a5 -; RV64-NEXT: sub a5, a0, a2 -; RV64-NEXT: mul a3, a5, a3 -; RV64-NEXT: mulhu a6, a5, a4 -; RV64-NEXT: add a3, a6, a3 ; RV64-NEXT: sltu a0, a0, a2 ; RV64-NEXT: sub a1, a1, a0 ; RV64-NEXT: mul a1, a1, a4 -; RV64-NEXT: add a1, a3, a1 -; RV64-NEXT: mul a0, a5, a4 +; RV64-NEXT: add a1, a5, a1 +; RV64-NEXT: mul a0, a3, a4 ; RV64-NEXT: ret %a = udiv iXLen2 %x, 65537 ret iXLen2 %a @@ -534,27 +557,29 @@ ; RV64-NEXT: srli a0, a0, 2 ; RV64-NEXT: or a0, a0, a2 ; RV64-NEXT: srli a1, a1, 2 -; RV64-NEXT: lui a2, %hi(.LCPI10_0) -; RV64-NEXT: ld a2, %lo(.LCPI10_0)(a2) -; RV64-NEXT: add a3, a0, a1 -; RV64-NEXT: sltu a4, a3, a0 +; RV64-NEXT: add a2, a0, a1 +; RV64-NEXT: sltu a3, a2, a0 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: lui a3, 699051 +; RV64-NEXT: addiw a3, a3, -1365 +; RV64-NEXT: slli a4, a3, 32 ; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: mulhu a4, a3, a2 +; RV64-NEXT: mulhu a4, a2, a3 ; RV64-NEXT: srli a5, a4, 1 ; RV64-NEXT: andi a4, a4, -2 -; RV64-NEXT: lui a6, %hi(.LCPI10_1) -; RV64-NEXT: ld a6, %lo(.LCPI10_1)(a6) +; RV64-NEXT: lui a6, %hi(.LCPI10_0) +; RV64-NEXT: ld a6, %lo(.LCPI10_0)(a6) ; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: sub a3, a3, a4 -; RV64-NEXT: sub a4, a0, a3 +; RV64-NEXT: sub a2, a2, a4 +; RV64-NEXT: sub a4, a0, a2 ; RV64-NEXT: mul a5, a4, a6 -; RV64-NEXT: mulhu a6, a4, a2 +; RV64-NEXT: mulhu a6, a4, a3 ; RV64-NEXT: add a5, a6, a5 -; RV64-NEXT: sltu a0, a0, a3 +; RV64-NEXT: sltu a0, a0, a2 ; RV64-NEXT: sub a1, a1, a0 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, a5, a1 -; RV64-NEXT: mul a0, a4, a2 +; RV64-NEXT: mul a0, a4, a3 ; RV64-NEXT: ret %a = udiv iXLen2 %x, 12 ret iXLen2 %a diff --git a/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll b/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll --- a/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll @@ -22,12 +22,14 @@ ; ; RV64-LABEL: test_urem_3: ; RV64: # %bb.0: -; RV64-NEXT: lui a2, %hi(.LCPI0_0) -; RV64-NEXT: ld a2, %lo(.LCPI0_0)(a2) ; RV64-NEXT: add a1, a0, a1 ; RV64-NEXT: sltu a0, a1, a0 ; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: mulhu a1, a0, a2 +; RV64-NEXT: lui a1, 699051 +; RV64-NEXT: addiw a1, a1, -1365 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: mulhu a1, a0, a1 ; RV64-NEXT: srli a2, a1, 1 ; RV64-NEXT: andi a1, a1, -2 ; RV64-NEXT: add a1, a1, a2 @@ -56,12 +58,14 @@ ; ; RV64-LABEL: test_urem_5: ; RV64: # %bb.0: -; RV64-NEXT: lui a2, %hi(.LCPI1_0) -; RV64-NEXT: ld a2, %lo(.LCPI1_0)(a2) ; RV64-NEXT: add a1, a0, a1 ; RV64-NEXT: sltu a0, a1, a0 ; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: mulhu a1, a0, a2 +; RV64-NEXT: lui a1, 838861 +; RV64-NEXT: addiw a1, a1, -819 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: mulhu a1, a0, a1 ; RV64-NEXT: srli a2, a1, 2 ; RV64-NEXT: andi a1, a1, -4 ; RV64-NEXT: add a1, a1, a2 @@ -142,12 +146,14 @@ ; ; RV64-LABEL: test_urem_15: ; RV64: # %bb.0: -; RV64-NEXT: lui a2, %hi(.LCPI4_0) -; RV64-NEXT: ld a2, %lo(.LCPI4_0)(a2) ; RV64-NEXT: add a1, a0, a1 ; RV64-NEXT: sltu a0, a1, a0 ; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: mulhu a1, a0, a2 +; RV64-NEXT: lui a1, 559241 +; RV64-NEXT: addiw a1, a1, -1911 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: mulhu a1, a0, a1 ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: slli a2, a1, 4 ; RV64-NEXT: sub a1, a1, a2 @@ -176,12 +182,14 @@ ; ; RV64-LABEL: test_urem_17: ; RV64: # %bb.0: -; RV64-NEXT: lui a2, %hi(.LCPI5_0) -; RV64-NEXT: ld a2, %lo(.LCPI5_0)(a2) ; RV64-NEXT: add a1, a0, a1 ; RV64-NEXT: sltu a0, a1, a0 ; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: mulhu a1, a0, a2 +; RV64-NEXT: lui a1, 986895 +; RV64-NEXT: addiw a1, a1, 241 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: mulhu a1, a0, a1 ; RV64-NEXT: srli a2, a1, 4 ; RV64-NEXT: andi a1, a1, -16 ; RV64-NEXT: add a1, a1, a2 @@ -210,12 +218,14 @@ ; ; RV64-LABEL: test_urem_255: ; RV64: # %bb.0: -; RV64-NEXT: lui a2, %hi(.LCPI6_0) -; RV64-NEXT: ld a2, %lo(.LCPI6_0)(a2) ; RV64-NEXT: add a1, a0, a1 ; RV64-NEXT: sltu a0, a1, a0 ; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: mulhu a1, a0, a2 +; RV64-NEXT: lui a1, 526344 +; RV64-NEXT: addiw a1, a1, 129 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: mulhu a1, a0, a1 ; RV64-NEXT: srli a1, a1, 7 ; RV64-NEXT: slli a2, a1, 8 ; RV64-NEXT: sub a1, a1, a2 @@ -244,12 +254,14 @@ ; ; RV64-LABEL: test_urem_257: ; RV64: # %bb.0: -; RV64-NEXT: lui a2, %hi(.LCPI7_0) -; RV64-NEXT: ld a2, %lo(.LCPI7_0)(a2) ; RV64-NEXT: add a1, a0, a1 ; RV64-NEXT: sltu a0, a1, a0 ; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: mulhu a1, a0, a2 +; RV64-NEXT: lui a1, 1044496 +; RV64-NEXT: addiw a1, a1, -255 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: mulhu a1, a0, a1 ; RV64-NEXT: srli a2, a1, 8 ; RV64-NEXT: andi a1, a1, -256 ; RV64-NEXT: add a1, a1, a2 @@ -278,12 +290,14 @@ ; ; RV64-LABEL: test_urem_65535: ; RV64: # %bb.0: -; RV64-NEXT: lui a2, %hi(.LCPI8_0) -; RV64-NEXT: ld a2, %lo(.LCPI8_0)(a2) ; RV64-NEXT: add a1, a0, a1 ; RV64-NEXT: sltu a0, a1, a0 ; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: mulhu a1, a0, a2 +; RV64-NEXT: lui a1, 524296 +; RV64-NEXT: addiw a1, a1, 1 +; RV64-NEXT: slli a2, a1, 32 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: mulhu a1, a0, a1 ; RV64-NEXT: srli a1, a1, 15 ; RV64-NEXT: slli a2, a1, 16 ; RV64-NEXT: sub a1, a1, a2 @@ -315,16 +329,14 @@ ; RV64-NEXT: add a1, a0, a1 ; RV64-NEXT: sltu a0, a1, a0 ; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: lui a1, 983041 -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi a1, a1, 1 -; RV64-NEXT: mulhu a1, a0, a1 -; RV64-NEXT: lui a2, 1048560 -; RV64-NEXT: and a2, a1, a2 -; RV64-NEXT: srli a1, a1, 16 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: lui a1, 1048560 +; RV64-NEXT: addiw a2, a1, 1 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: mulhu a2, a0, a2 +; RV64-NEXT: and a1, a2, a1 +; RV64-NEXT: srli a2, a2, 16 +; RV64-NEXT: add a1, a1, a2 ; RV64-NEXT: sub a0, a0, a1 ; RV64-NEXT: li a1, 0 ; RV64-NEXT: ret @@ -361,12 +373,14 @@ ; RV64-NEXT: srli a3, a0, 2 ; RV64-NEXT: or a2, a3, a2 ; RV64-NEXT: srli a1, a1, 2 -; RV64-NEXT: lui a3, %hi(.LCPI10_0) -; RV64-NEXT: ld a3, %lo(.LCPI10_0)(a3) ; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: sltu a2, a1, a2 ; RV64-NEXT: add a1, a1, a2 -; RV64-NEXT: mulhu a2, a1, a3 +; RV64-NEXT: lui a2, 699051 +; RV64-NEXT: addiw a2, a2, -1365 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: mulhu a2, a1, a2 ; RV64-NEXT: srli a3, a2, 1 ; RV64-NEXT: andi a2, a2, -2 ; RV64-NEXT: add a2, a2, a3 diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -409,12 +409,14 @@ ; RV64-NEXT: mv a0, s1 ; RV64-NEXT: call __moddi3@plt ; RV64-NEXT: mv s1, a0 -; RV64-NEXT: lui a0, %hi(.LCPI3_0) -; RV64-NEXT: ld a1, %lo(.LCPI3_0)(a0) +; RV64-NEXT: lui a0, 699051 +; RV64-NEXT: addiw a1, a0, -1365 +; RV64-NEXT: slli a0, a1, 32 +; RV64-NEXT: add a1, a1, a0 ; RV64-NEXT: mv a0, s2 ; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lui a1, %hi(.LCPI3_1) -; RV64-NEXT: ld a1, %lo(.LCPI3_1)(a1) +; RV64-NEXT: lui a1, %hi(.LCPI3_0) +; RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) ; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: slli a2, a0, 63 ; RV64-NEXT: srli a0, a0, 1 @@ -565,10 +567,12 @@ ; RV64M-NEXT: sub a3, a3, a5 ; RV64M-NEXT: addi a3, a3, -1 ; RV64M-NEXT: seqz a3, a3 -; RV64M-NEXT: lui a4, %hi(.LCPI3_2) -; RV64M-NEXT: ld a4, %lo(.LCPI3_2)(a4) -; RV64M-NEXT: lui a5, %hi(.LCPI3_3) -; RV64M-NEXT: ld a5, %lo(.LCPI3_3)(a5) +; RV64M-NEXT: lui a4, 699051 +; RV64M-NEXT: addiw a4, a4, -1365 +; RV64M-NEXT: slli a5, a4, 32 +; RV64M-NEXT: add a4, a4, a5 +; RV64M-NEXT: lui a5, %hi(.LCPI3_2) +; RV64M-NEXT: ld a5, %lo(.LCPI3_2)(a5) ; RV64M-NEXT: addi a2, a2, -2 ; RV64M-NEXT: seqz a2, a2 ; RV64M-NEXT: mul a1, a1, a4