diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -125,12 +125,37 @@ CurDAG->RemoveDeadNodes(); } -static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm, - const RISCVSubtarget &Subtarget) { +static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL, + const MVT VT, int64_t Imm, + const RISCVSubtarget &Subtarget) { + assert(VT == MVT::i64 && "Expecting MVT::i64"); + const RISCVTargetLowering *TLI = Subtarget.getTargetLowering(); + ConstantPoolSDNode *CP = cast(CurDAG->getConstantPool( + ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT)); + SDValue Addr = TLI->getAddr(CP, *CurDAG); + SDValue Offset = CurDAG->getTargetConstant(0, DL, VT); + // Since there is no data race, the chain can be the entry node. + SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset, + CurDAG->getEntryNode()); + MachineFunction &MF = CurDAG->getMachineFunction(); + MachineMemOperand *MemOp = MF.getMachineMemOperand( + MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, + LLT(VT), CP->getAlign()); + CurDAG->setNodeMemRefs(cast(Load), {MemOp}); + return Load; +} + +static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, + int64_t Imm, const RISCVSubtarget &Subtarget) { MVT XLenVT = Subtarget.getXLenVT(); RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); + // If Imm is expensive to build, then we put it into constant pool. + if (Subtarget.useConstantPoolForLargeInts() && + Seq.size() > Subtarget.getMaxBuildIntsCost()) + return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget); + SDNode *Result = nullptr; SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT); for (RISCVMatInt::Inst &Inst : Seq) { @@ -498,7 +523,7 @@ if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) Imm = SignExtend64(Imm, 32); - ReplaceNode(Node, selectImm(CurDAG, DL, Imm, *Subtarget)); + ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); return; } case ISD::FrameIndex: { @@ -774,7 +799,7 @@ ShiftedC1 = SignExtend64(ShiftedC1, 32); // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). - SDNode *Imm = selectImm(CurDAG, DL, ShiftedC1, *Subtarget); + SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget); SDNode *SLLI = CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), CurDAG->getTargetConstant(LeadingZeros, DL, VT)); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -462,6 +462,8 @@ SelectionDAG &DAG) const override; SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; + template + SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const; bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override { @@ -544,9 +546,6 @@ bool IsRet, CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const; - template - SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const; - SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, bool UseGOT) const; SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -157,6 +157,12 @@ const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; + bool useConstantPoolForLargeInts() const; + + // Maximum cost used for building integers, integers will be put into constant + // pool if exceeded. + unsigned getMaxBuildIntsCost() const; + // Return the known range for the bit length of RVV data registers. A value // of 0 means nothing is known about that particular limit beyond what's // implied by the architecture. diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -50,6 +50,16 @@ cl::desc("The maximum ELEN value to use for fixed length vectors."), cl::init(64), cl::Hidden); +static cl::opt RISCVDisableUsingConstantPoolForLargeInts( + "riscv-disable-using-constant-pool-for-large-ints", + cl::desc("Disable using constant pool for large integers."), + cl::init(false), cl::Hidden); + +static cl::opt RISCVMaxBuildIntsCost( + "riscv-max-build-ints-cost", + cl::desc("The maximum cost used for building integers."), cl::init(0), + cl::Hidden); + void RISCVSubtarget::anchor() {} RISCVSubtarget & @@ -110,6 +120,21 @@ return RegBankInfo.get(); } +bool RISCVSubtarget::useConstantPoolForLargeInts() const { + return !RISCVDisableUsingConstantPoolForLargeInts; +} + +unsigned RISCVSubtarget::getMaxBuildIntsCost() const { + // Loading integer from constant pool needs two instructions (the reason why + // the minimum cost is 2): an address calculation instruction and a load + // instruction. Usually, address calculation and instructions used for + // building integers (addi, slli, etc.) can be done in one cycle, so here we + // set the default cost to (LoadLatency + 1) if no threshold is provided. + return RISCVMaxBuildIntsCost == 0 + ? getSchedModel().LoadLatency + 1 + : std::max(2, RISCVMaxBuildIntsCost); +} + unsigned RISCVSubtarget::getMaxRVVVectorSizeInBits() const { assert(hasVInstructions() && "Tried to get vector length without Zve or V extension support!"); diff --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll --- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll @@ -513,46 +513,24 @@ ; RV64I-NEXT: addi a1, a0, -1 ; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI7_0) +; RV64I-NEXT: ld a1, %lo(.LCPI7_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI7_1) +; RV64I-NEXT: ld a2, %lo(.LCPI7_1)(a2) +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 13107 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: and a1, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a2, %hi(.LCPI7_2) +; RV64I-NEXT: ld a2, %lo(.LCPI7_2)(a2) +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 3855 -; RV64I-NEXT: addiw a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI7_3) +; RV64I-NEXT: ld a1, %lo(.LCPI7_3)(a1) ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -811,46 +789,24 @@ ; RV64I-NEXT: addi a1, a0, -1 ; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI11_0) +; RV64I-NEXT: ld a1, %lo(.LCPI11_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI11_1) +; RV64I-NEXT: ld a2, %lo(.LCPI11_1)(a2) +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 13107 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: and a1, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a2, %hi(.LCPI11_2) +; RV64I-NEXT: ld a2, %lo(.LCPI11_2)(a2) +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 3855 -; RV64I-NEXT: addiw a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI11_3) +; RV64I-NEXT: ld a1, %lo(.LCPI11_3)(a1) ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -982,46 +938,24 @@ ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI13_0) +; RV64I-NEXT: ld a1, %lo(.LCPI13_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI13_1) +; RV64I-NEXT: ld a2, %lo(.LCPI13_1)(a2) +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 13107 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: and a1, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a2, %hi(.LCPI13_2) +; RV64I-NEXT: ld a2, %lo(.LCPI13_2)(a2) +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 3855 -; RV64I-NEXT: addiw a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI13_3) +; RV64I-NEXT: ld a1, %lo(.LCPI13_3)(a1) ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/div-by-constant.ll b/llvm/test/CodeGen/RISCV/div-by-constant.ll --- a/llvm/test/CodeGen/RISCV/div-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/div-by-constant.ll @@ -100,14 +100,8 @@ ; ; RV64-LABEL: udiv64_constant_no_add: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, 1035469 -; RV64-NEXT: addiw a1, a1, -819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -819 +; RV64-NEXT: lui a1, %hi(.LCPI2_0) +; RV64-NEXT: ld a1, %lo(.LCPI2_0)(a1) ; RV64-NEXT: mulhu a0, a0, a1 ; RV64-NEXT: srli a0, a0, 2 ; RV64-NEXT: ret @@ -129,14 +123,8 @@ ; ; RV64-LABEL: udiv64_constant_add: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, 4681 -; RV64-NEXT: addiw a1, a1, 585 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 585 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 585 -; RV64-NEXT: slli a1, a1, 13 -; RV64-NEXT: addi a1, a1, 1171 +; RV64-NEXT: lui a1, %hi(.LCPI3_0) +; RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) ; RV64-NEXT: mulhu a1, a0, a1 ; RV64-NEXT: sub a0, a0, a1 ; RV64-NEXT: srli a0, a0, 1 @@ -443,14 +431,8 @@ ; ; RV64-LABEL: sdiv64_constant_no_srai: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, 21845 -; RV64-NEXT: addiw a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1366 +; RV64-NEXT: lui a1, %hi(.LCPI12_0) +; RV64-NEXT: ld a1, %lo(.LCPI12_0)(a1) ; RV64-NEXT: mulh a0, a0, a1 ; RV64-NEXT: srli a1, a0, 63 ; RV64-NEXT: add a0, a0, a1 @@ -473,14 +455,8 @@ ; ; RV64-LABEL: sdiv64_constant_srai: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, 13107 -; RV64-NEXT: addiw a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 819 -; RV64-NEXT: slli a1, a1, 13 -; RV64-NEXT: addi a1, a1, 1639 +; RV64-NEXT: lui a1, %hi(.LCPI13_0) +; RV64-NEXT: ld a1, %lo(.LCPI13_0)(a1) ; RV64-NEXT: mulh a0, a0, a1 ; RV64-NEXT: srli a1, a0, 63 ; RV64-NEXT: srai a0, a0, 1 @@ -504,14 +480,8 @@ ; ; RV64-LABEL: sdiv64_constant_add_srai: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, 1017993 -; RV64-NEXT: addiw a1, a1, -1911 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1911 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1911 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1911 +; RV64-NEXT: lui a1, %hi(.LCPI14_0) +; RV64-NEXT: ld a1, %lo(.LCPI14_0)(a1) ; RV64-NEXT: mulh a1, a0, a1 ; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: srli a1, a0, 63 @@ -536,14 +506,8 @@ ; ; RV64-LABEL: sdiv64_constant_sub_srai: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, 21845 -; RV64-NEXT: addiw a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 +; RV64-NEXT: lui a1, %hi(.LCPI15_0) +; RV64-NEXT: ld a1, %lo(.LCPI15_0)(a1) ; RV64-NEXT: mulh a1, a0, a1 ; RV64-NEXT: sub a0, a1, a0 ; RV64-NEXT: srli a1, a0, 63 diff --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll --- a/llvm/test/CodeGen/RISCV/div.ll +++ b/llvm/test/CodeGen/RISCV/div.ll @@ -222,14 +222,8 @@ ; ; RV64IM-LABEL: udiv64_constant: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a1, 1035469 -; RV64IM-NEXT: addiw a1, a1, -819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -819 +; RV64IM-NEXT: lui a1, %hi(.LCPI5_0) +; RV64IM-NEXT: ld a1, %lo(.LCPI5_0)(a1) ; RV64IM-NEXT: mulhu a0, a0, a1 ; RV64IM-NEXT: srli a0, a0, 2 ; RV64IM-NEXT: ret @@ -866,14 +860,8 @@ ; ; RV64IM-LABEL: sdiv64_constant: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a1, 13107 -; RV64IM-NEXT: addiw a1, a1, 819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, 819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, 819 -; RV64IM-NEXT: slli a1, a1, 13 -; RV64IM-NEXT: addi a1, a1, 1639 +; RV64IM-NEXT: lui a1, %hi(.LCPI21_0) +; RV64IM-NEXT: ld a1, %lo(.LCPI21_0)(a1) ; RV64IM-NEXT: mulh a0, a0, a1 ; RV64IM-NEXT: srli a1, a0, 63 ; RV64IM-NEXT: srai a0, a0, 1 diff --git a/llvm/test/CodeGen/RISCV/double-imm.ll b/llvm/test/CodeGen/RISCV/double-imm.ll --- a/llvm/test/CodeGen/RISCV/double-imm.ll +++ b/llvm/test/CodeGen/RISCV/double-imm.ll @@ -5,9 +5,6 @@ ; RUN: | FileCheck -check-prefix=RV64IFD %s define double @double_imm() nounwind { -; TODO: Should probably prefer fld or ld on RV64 rather than materialising an -; expensive constant. -; ; RV32IFD-LABEL: double_imm: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: lui a0, 345155 @@ -18,14 +15,8 @@ ; ; RV64IFD-LABEL: double_imm: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: lui a0, 512 -; RV64IFD-NEXT: addiw a0, a0, 1169 -; RV64IFD-NEXT: slli a0, a0, 15 -; RV64IFD-NEXT: addi a0, a0, -299 -; RV64IFD-NEXT: slli a0, a0, 14 -; RV64IFD-NEXT: addi a0, a0, 1091 -; RV64IFD-NEXT: slli a0, a0, 12 -; RV64IFD-NEXT: addi a0, a0, -744 +; RV64IFD-NEXT: lui a0, %hi(.LCPI0_0) +; RV64IFD-NEXT: ld a0, %lo(.LCPI0_0)(a0) ; RV64IFD-NEXT: ret ret double 3.1415926535897931159979634685441851615905761718750 } diff --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll --- a/llvm/test/CodeGen/RISCV/imm.ll +++ b/llvm/test/CodeGen/RISCV/imm.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -riscv-disable-using-constant-pool-for-large-ints -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I -; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zba \ +; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+experimental-zba \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IZBA -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbs \ +; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+experimental-zbs \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IZBS ; Materializing constants diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -328,46 +328,24 @@ ; RV64I-NEXT: srli a1, a0, 32 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI5_0) +; RV64I-NEXT: ld a1, %lo(.LCPI5_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI5_1) +; RV64I-NEXT: ld a2, %lo(.LCPI5_1)(a2) +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 13107 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: and a1, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a2, %hi(.LCPI5_2) +; RV64I-NEXT: ld a2, %lo(.LCPI5_2)(a2) +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 3855 -; RV64I-NEXT: addiw a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI5_3) +; RV64I-NEXT: ld a1, %lo(.LCPI5_3)(a1) ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -597,46 +575,24 @@ ; RV64I-NEXT: addi a1, a0, -1 ; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI10_0) +; RV64I-NEXT: ld a1, %lo(.LCPI10_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI10_1) +; RV64I-NEXT: ld a2, %lo(.LCPI10_1)(a2) +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 13107 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: and a1, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a2, %hi(.LCPI10_2) +; RV64I-NEXT: ld a2, %lo(.LCPI10_2)(a2) +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 3855 -; RV64I-NEXT: addiw a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI10_3) +; RV64I-NEXT: ld a1, %lo(.LCPI10_3)(a1) ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -740,46 +696,24 @@ ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI13_0) +; RV64I-NEXT: ld a1, %lo(.LCPI13_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI13_1) +; RV64I-NEXT: ld a2, %lo(.LCPI13_1)(a2) +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 13107 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: and a1, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a2, %hi(.LCPI13_2) +; RV64I-NEXT: ld a2, %lo(.LCPI13_2)(a2) +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 3855 -; RV64I-NEXT: addiw a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI13_3) +; RV64I-NEXT: ld a1, %lo(.LCPI13_3)(a1) ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll --- a/llvm/test/CodeGen/RISCV/rv64zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll @@ -35,26 +35,14 @@ define i64 @gorc1_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc1_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 1 -; RV64I-NEXT: lui a3, 21845 -; RV64I-NEXT: addiw a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI1_0) +; RV64I-NEXT: ld a1, %lo(.LCPI1_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI1_1) +; RV64I-NEXT: ld a2, %lo(.LCPI1_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -103,26 +91,14 @@ define i64 @gorc2_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc2_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 2 -; RV64I-NEXT: lui a3, 13107 -; RV64I-NEXT: addiw a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI3_0) +; RV64I-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI3_1) +; RV64I-NEXT: ld a2, %lo(.LCPI3_1)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 2 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -187,48 +163,24 @@ define i64 @gorc3_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc3_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 1 -; RV64I-NEXT: lui a3, 21845 -; RV64I-NEXT: addiw a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI5_0) +; RV64I-NEXT: ld a1, %lo(.LCPI5_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI5_1) +; RV64I-NEXT: ld a2, %lo(.LCPI5_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 2 -; RV64I-NEXT: lui a3, 13107 -; RV64I-NEXT: addiw a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI5_2) +; RV64I-NEXT: ld a1, %lo(.LCPI5_2)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI5_3) +; RV64I-NEXT: ld a2, %lo(.LCPI5_3)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 2 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -283,26 +235,14 @@ define i64 @gorc4_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc4_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 1044721 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 4 -; RV64I-NEXT: lui a3, 3855 -; RV64I-NEXT: addiw a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI7_0) +; RV64I-NEXT: ld a1, %lo(.LCPI7_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI7_1) +; RV64I-NEXT: ld a2, %lo(.LCPI7_1)(a2) +; RV64I-NEXT: slli a3, a0, 4 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 4 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -367,48 +307,24 @@ define i64 @gorc5_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc5_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 1 -; RV64I-NEXT: lui a3, 21845 -; RV64I-NEXT: addiw a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI9_0) +; RV64I-NEXT: ld a1, %lo(.LCPI9_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI9_1) +; RV64I-NEXT: ld a2, %lo(.LCPI9_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 1044721 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 4 -; RV64I-NEXT: lui a3, 3855 -; RV64I-NEXT: addiw a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI9_2) +; RV64I-NEXT: ld a1, %lo(.LCPI9_2)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI9_3) +; RV64I-NEXT: ld a2, %lo(.LCPI9_3)(a2) +; RV64I-NEXT: slli a3, a0, 4 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 4 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -479,48 +395,24 @@ define i64 @gorc6_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc6_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 2 -; RV64I-NEXT: lui a3, 13107 -; RV64I-NEXT: addiw a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI11_0) +; RV64I-NEXT: ld a1, %lo(.LCPI11_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI11_1) +; RV64I-NEXT: ld a2, %lo(.LCPI11_1)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 2 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 1044721 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 4 -; RV64I-NEXT: lui a3, 3855 -; RV64I-NEXT: addiw a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI11_2) +; RV64I-NEXT: ld a1, %lo(.LCPI11_2)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI11_3) +; RV64I-NEXT: ld a2, %lo(.LCPI11_3)(a2) +; RV64I-NEXT: slli a3, a0, 4 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 4 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -608,70 +500,34 @@ define i64 @gorc7_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc7_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 1 -; RV64I-NEXT: lui a3, 21845 -; RV64I-NEXT: addiw a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI13_0) +; RV64I-NEXT: ld a1, %lo(.LCPI13_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI13_1) +; RV64I-NEXT: ld a2, %lo(.LCPI13_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 1 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 2 -; RV64I-NEXT: lui a3, 13107 -; RV64I-NEXT: addiw a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI13_2) +; RV64I-NEXT: ld a1, %lo(.LCPI13_2)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI13_3) +; RV64I-NEXT: ld a2, %lo(.LCPI13_3)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 2 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 1044721 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 4 -; RV64I-NEXT: lui a3, 3855 -; RV64I-NEXT: addiw a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI13_4) +; RV64I-NEXT: ld a1, %lo(.LCPI13_4)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI13_5) +; RV64I-NEXT: ld a2, %lo(.LCPI13_5)(a2) +; RV64I-NEXT: slli a3, a0, 4 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 4 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -732,22 +588,14 @@ define i64 @gorc8_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc8_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 8 -; RV64I-NEXT: lui a2, 1044496 -; RV64I-NEXT: addiw a2, a2, -255 -; RV64I-NEXT: slli a2, a2, 16 -; RV64I-NEXT: addi a2, a2, -255 -; RV64I-NEXT: slli a2, a2, 16 -; RV64I-NEXT: addi a2, a2, -256 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 8 -; RV64I-NEXT: lui a3, 4080 -; RV64I-NEXT: addiw a3, a3, 255 -; RV64I-NEXT: slli a3, a3, 16 -; RV64I-NEXT: addi a3, a3, 255 -; RV64I-NEXT: slli a3, a3, 16 -; RV64I-NEXT: addi a3, a3, 255 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: lui a1, %hi(.LCPI15_0) +; RV64I-NEXT: ld a1, %lo(.LCPI15_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI15_1) +; RV64I-NEXT: ld a2, %lo(.LCPI15_1)(a2) +; RV64I-NEXT: slli a3, a0, 8 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 8 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -917,32 +765,20 @@ define i64 @gorc2b_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc2b_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI22_0) +; RV64I-NEXT: ld a1, %lo(.LCPI22_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI22_1) +; RV64I-NEXT: ld a2, %lo(.LCPI22_1)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a3, a3, a1 +; RV64I-NEXT: srli a4, a0, 2 +; RV64I-NEXT: and a4, a4, a2 +; RV64I-NEXT: or a0, a4, a0 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a3, a0, 2 -; RV64I-NEXT: lui a4, 13107 -; RV64I-NEXT: addiw a4, a4, 819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 819 -; RV64I-NEXT: and a3, a3, a4 -; RV64I-NEXT: or a0, a3, a0 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 2 -; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -1028,54 +864,30 @@ define i64 @gorc3b_i64(i64 %a) nounwind { ; RV64I-LABEL: gorc3b_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI24_0) +; RV64I-NEXT: ld a1, %lo(.LCPI24_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI24_1) +; RV64I-NEXT: ld a2, %lo(.LCPI24_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a3, a3, a1 +; RV64I-NEXT: srli a4, a0, 1 +; RV64I-NEXT: and a4, a4, a2 +; RV64I-NEXT: or a0, a4, a0 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: lui a3, %hi(.LCPI24_2) +; RV64I-NEXT: ld a3, %lo(.LCPI24_2)(a3) +; RV64I-NEXT: lui a4, %hi(.LCPI24_3) +; RV64I-NEXT: ld a4, %lo(.LCPI24_3)(a4) +; RV64I-NEXT: slli a5, a0, 2 +; RV64I-NEXT: and a3, a5, a3 +; RV64I-NEXT: srli a5, a0, 2 +; RV64I-NEXT: and a4, a5, a4 +; RV64I-NEXT: or a0, a4, a0 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a3, a0, 1 -; RV64I-NEXT: lui a4, 21845 -; RV64I-NEXT: addiw a4, a4, 1365 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 1365 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 1365 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 1365 -; RV64I-NEXT: and a3, a3, a4 -; RV64I-NEXT: or a0, a3, a0 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a3, 1035469 -; RV64I-NEXT: addiw a3, a3, -819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -820 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: srli a3, a0, 2 -; RV64I-NEXT: lui a5, 13107 -; RV64I-NEXT: addiw a5, a5, 819 -; RV64I-NEXT: slli a5, a5, 12 -; RV64I-NEXT: addi a5, a5, 819 -; RV64I-NEXT: slli a5, a5, 12 -; RV64I-NEXT: addi a5, a5, 819 -; RV64I-NEXT: slli a5, a5, 12 -; RV64I-NEXT: addi a5, a5, 819 -; RV64I-NEXT: and a3, a3, a5 -; RV64I-NEXT: or a0, a3, a0 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 1 -; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -1170,25 +982,13 @@ define i64 @grev1_i64(i64 %a) nounwind { ; RV64I-LABEL: grev1_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI28_0) +; RV64I-NEXT: ld a1, %lo(.LCPI28_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI28_1) +; RV64I-NEXT: ld a2, %lo(.LCPI28_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -1234,25 +1034,13 @@ define i64 @grev2_i64(i64 %a) nounwind { ; RV64I-LABEL: grev2_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI30_0) +; RV64I-NEXT: ld a1, %lo(.LCPI30_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI30_1) +; RV64I-NEXT: ld a2, %lo(.LCPI30_1)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: lui a2, 13107 -; RV64I-NEXT: addiw a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -1312,46 +1100,22 @@ define i64 @grev3_i64(i64 %a) nounwind { ; RV64I-LABEL: grev3_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI32_0) +; RV64I-NEXT: ld a1, %lo(.LCPI32_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI32_1) +; RV64I-NEXT: ld a2, %lo(.LCPI32_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI32_2) +; RV64I-NEXT: ld a1, %lo(.LCPI32_2)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI32_3) +; RV64I-NEXT: ld a2, %lo(.LCPI32_3)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: lui a2, 13107 -; RV64I-NEXT: addiw a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -1402,25 +1166,13 @@ define i64 @grev4_i64(i64 %a) nounwind { ; RV64I-LABEL: grev4_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 1044721 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI34_0) +; RV64I-NEXT: ld a1, %lo(.LCPI34_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI34_1) +; RV64I-NEXT: ld a2, %lo(.LCPI34_1)(a2) +; RV64I-NEXT: slli a3, a0, 4 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 4 -; RV64I-NEXT: lui a2, 3855 -; RV64I-NEXT: addiw a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -1480,46 +1232,22 @@ define i64 @grev5_i64(i64 %a) nounwind { ; RV64I-LABEL: grev5_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI36_0) +; RV64I-NEXT: ld a1, %lo(.LCPI36_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI36_1) +; RV64I-NEXT: ld a2, %lo(.LCPI36_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 1044721 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI36_2) +; RV64I-NEXT: ld a1, %lo(.LCPI36_2)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI36_3) +; RV64I-NEXT: ld a2, %lo(.LCPI36_3)(a2) +; RV64I-NEXT: slli a3, a0, 4 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 4 -; RV64I-NEXT: lui a2, 3855 -; RV64I-NEXT: addiw a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -1585,46 +1313,22 @@ define i64 @grev6_i64(i64 %a) nounwind { ; RV64I-LABEL: grev6_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI38_0) +; RV64I-NEXT: ld a1, %lo(.LCPI38_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI38_1) +; RV64I-NEXT: ld a2, %lo(.LCPI38_1)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: lui a2, 13107 -; RV64I-NEXT: addiw a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 1044721 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI38_2) +; RV64I-NEXT: ld a1, %lo(.LCPI38_2)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI38_3) +; RV64I-NEXT: ld a2, %lo(.LCPI38_3)(a2) +; RV64I-NEXT: slli a3, a0, 4 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 4 -; RV64I-NEXT: lui a2, 3855 -; RV64I-NEXT: addiw a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -1703,67 +1407,31 @@ define i64 @grev7_i64(i64 %a) nounwind { ; RV64I-LABEL: grev7_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI40_0) +; RV64I-NEXT: ld a1, %lo(.LCPI40_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI40_1) +; RV64I-NEXT: ld a2, %lo(.LCPI40_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI40_2) +; RV64I-NEXT: ld a1, %lo(.LCPI40_2)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI40_3) +; RV64I-NEXT: ld a2, %lo(.LCPI40_3)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: lui a2, 13107 -; RV64I-NEXT: addiw a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 4 -; RV64I-NEXT: lui a2, 1044721 -; RV64I-NEXT: addiw a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 240 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI40_4) +; RV64I-NEXT: ld a1, %lo(.LCPI40_4)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI40_5) +; RV64I-NEXT: ld a2, %lo(.LCPI40_5)(a2) +; RV64I-NEXT: slli a3, a0, 4 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 4 -; RV64I-NEXT: lui a2, 3855 -; RV64I-NEXT: addiw a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -1819,21 +1487,13 @@ define i64 @grev8_i64(i64 %a) nounwind { ; RV64I-LABEL: grev8_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 8 -; RV64I-NEXT: lui a2, 1044496 -; RV64I-NEXT: addiw a2, a2, -255 -; RV64I-NEXT: slli a2, a2, 16 -; RV64I-NEXT: addi a2, a2, -255 -; RV64I-NEXT: slli a2, a2, 16 -; RV64I-NEXT: addi a2, a2, -256 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI42_0) +; RV64I-NEXT: ld a1, %lo(.LCPI42_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI42_1) +; RV64I-NEXT: ld a2, %lo(.LCPI42_1)(a2) +; RV64I-NEXT: slli a3, a0, 8 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: lui a2, 4080 -; RV64I-NEXT: addiw a2, a2, 255 -; RV64I-NEXT: slli a2, a2, 16 -; RV64I-NEXT: addi a2, a2, 255 -; RV64I-NEXT: slli a2, a2, 16 -; RV64I-NEXT: addi a2, a2, 255 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -1993,46 +1653,22 @@ define i64 @grev3b_i64(i64 %a) nounwind { ; RV64I-LABEL: grev3b_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a2, 1035469 -; RV64I-NEXT: addiw a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -820 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI49_0) +; RV64I-NEXT: ld a1, %lo(.LCPI49_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI49_1) +; RV64I-NEXT: ld a2, %lo(.LCPI49_1)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: lui a2, 13107 -; RV64I-NEXT: addiw a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI49_2) +; RV64I-NEXT: ld a1, %lo(.LCPI49_2)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI49_3) +; RV64I-NEXT: ld a2, %lo(.LCPI49_3)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret @@ -2109,52 +1745,28 @@ define i64 @grev2b_i64(i64 %a) nounwind { ; RV64I-LABEL: grev2b_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI51_0) +; RV64I-NEXT: ld a1, %lo(.LCPI51_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI51_1) +; RV64I-NEXT: ld a2, %lo(.LCPI51_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a3, a3, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: lui a3, 21845 -; RV64I-NEXT: addiw a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a4, 1035469 -; RV64I-NEXT: addiw a4, a4, -819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, -819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, -819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, -820 -; RV64I-NEXT: and a1, a1, a4 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: lui a3, %hi(.LCPI51_2) +; RV64I-NEXT: ld a3, %lo(.LCPI51_2)(a3) +; RV64I-NEXT: lui a4, %hi(.LCPI51_3) +; RV64I-NEXT: ld a4, %lo(.LCPI51_3)(a4) +; RV64I-NEXT: slli a5, a0, 2 +; RV64I-NEXT: and a3, a5, a3 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: lui a4, 13107 -; RV64I-NEXT: addiw a4, a4, 819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 819 ; RV64I-NEXT: and a0, a0, a4 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; @@ -2244,57 +1856,33 @@ define i64 @grev0_i64(i64 %a) nounwind { ; RV64I-LABEL: grev0_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: lui a2, 1026731 -; RV64I-NEXT: addiw a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1366 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI53_0) +; RV64I-NEXT: ld a1, %lo(.LCPI53_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI53_1) +; RV64I-NEXT: ld a2, %lo(.LCPI53_1)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a3, a3, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: lui a3, 21845 -; RV64I-NEXT: addiw a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: and a0, a0, a3 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: lui a4, 1035469 -; RV64I-NEXT: addiw a4, a4, -819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, -819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, -819 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, -820 -; RV64I-NEXT: and a1, a1, a4 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: lui a3, %hi(.LCPI53_2) +; RV64I-NEXT: ld a3, %lo(.LCPI53_2)(a3) +; RV64I-NEXT: lui a4, %hi(.LCPI53_3) +; RV64I-NEXT: ld a4, %lo(.LCPI53_3)(a4) +; RV64I-NEXT: slli a5, a0, 2 +; RV64I-NEXT: and a5, a5, a3 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: lui a5, 13107 -; RV64I-NEXT: addiw a5, a5, 819 -; RV64I-NEXT: slli a5, a5, 12 -; RV64I-NEXT: addi a5, a5, 819 -; RV64I-NEXT: slli a5, a5, 12 -; RV64I-NEXT: addi a5, a5, 819 -; RV64I-NEXT: slli a5, a5, 12 -; RV64I-NEXT: addi a5, a5, 819 -; RV64I-NEXT: and a0, a0, a5 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a1, a0, 1 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a4 +; RV64I-NEXT: or a0, a5, a0 +; RV64I-NEXT: slli a5, a0, 1 +; RV64I-NEXT: and a1, a5, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: slli a1, a0, 2 -; RV64I-NEXT: and a1, a1, a4 +; RV64I-NEXT: and a1, a1, a3 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a5 +; RV64I-NEXT: and a0, a0, a4 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; @@ -2676,43 +2264,25 @@ ; RV64I-NEXT: and a3, a4, a3 ; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: lui a3, %hi(.LCPI64_0) +; RV64I-NEXT: ld a3, %lo(.LCPI64_0)(a3) ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: lui a2, 3855 -; RV64I-NEXT: addiw a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 241 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -241 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: lui a2, %hi(.LCPI64_1) +; RV64I-NEXT: ld a2, %lo(.LCPI64_1)(a2) ; RV64I-NEXT: slli a0, a0, 4 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 2 -; RV64I-NEXT: lui a2, 13107 -; RV64I-NEXT: addiw a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 819 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a2, %hi(.LCPI64_2) +; RV64I-NEXT: ld a2, %lo(.LCPI64_2)(a2) ; RV64I-NEXT: slli a0, a0, 2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 21845 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, 1365 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: slli a0, a0, 1 @@ -2838,94 +2408,76 @@ define i64 @bitreverse_bswap_i64(i64 %a) { ; RV64I-LABEL: bitreverse_bswap_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a2, a0, 24 +; RV64I-NEXT: srli a1, a0, 24 ; RV64I-NEXT: lui a6, 4080 -; RV64I-NEXT: and a3, a2, a6 -; RV64I-NEXT: srli a4, a0, 8 -; RV64I-NEXT: li a1, 255 -; RV64I-NEXT: slli a7, a1, 24 -; RV64I-NEXT: and a4, a4, a7 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: srli a4, a0, 40 -; RV64I-NEXT: lui a5, 16 -; RV64I-NEXT: addiw a5, a5, -256 -; RV64I-NEXT: and a4, a4, a5 -; RV64I-NEXT: srli a2, a0, 56 -; RV64I-NEXT: or a2, a4, a2 -; RV64I-NEXT: or a2, a3, a2 -; RV64I-NEXT: slli a4, a0, 24 -; RV64I-NEXT: slli t0, a1, 40 -; RV64I-NEXT: and a4, a4, t0 -; RV64I-NEXT: srliw a3, a0, 24 -; RV64I-NEXT: slli a3, a3, 32 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: slli a4, a0, 40 -; RV64I-NEXT: slli a1, a1, 48 -; RV64I-NEXT: and a4, a4, a1 +; RV64I-NEXT: and a1, a1, a6 +; RV64I-NEXT: srli a3, a0, 8 +; RV64I-NEXT: li a4, 255 +; RV64I-NEXT: slli a7, a4, 24 +; RV64I-NEXT: and a3, a3, a7 +; RV64I-NEXT: or a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 40 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 +; RV64I-NEXT: and a3, a3, a2 +; RV64I-NEXT: srli a5, a0, 56 +; RV64I-NEXT: or a3, a3, a5 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: slli a3, a0, 24 +; RV64I-NEXT: slli t0, a4, 40 +; RV64I-NEXT: and a3, a3, t0 +; RV64I-NEXT: srliw a5, a0, 24 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a3, a3, a5 +; RV64I-NEXT: slli a5, a0, 40 +; RV64I-NEXT: slli a4, a4, 48 +; RV64I-NEXT: and a5, a5, a4 ; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: lui a5, %hi(.LCPI68_0) +; RV64I-NEXT: ld a5, %lo(.LCPI68_0)(a5) ; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: srli a2, a0, 4 -; RV64I-NEXT: lui a3, 3855 -; RV64I-NEXT: addiw a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 241 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, -241 -; RV64I-NEXT: and a2, a2, a3 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: and a1, a1, a5 +; RV64I-NEXT: and a0, a0, a5 +; RV64I-NEXT: lui a3, %hi(.LCPI68_1) +; RV64I-NEXT: ld a3, %lo(.LCPI68_1)(a3) ; RV64I-NEXT: slli a0, a0, 4 -; RV64I-NEXT: or a0, a2, a0 -; RV64I-NEXT: srli a2, a0, 2 -; RV64I-NEXT: lui a3, 13107 -; RV64I-NEXT: addiw a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 819 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: and a1, a1, a3 ; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: lui a3, %hi(.LCPI68_2) +; RV64I-NEXT: ld a3, %lo(.LCPI68_2)(a3) ; RV64I-NEXT: slli a0, a0, 2 -; RV64I-NEXT: or a0, a2, a0 -; RV64I-NEXT: srli a2, a0, 1 -; RV64I-NEXT: lui a3, 21845 -; RV64I-NEXT: addiw a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 1365 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: and a1, a1, a3 ; RV64I-NEXT: and a0, a0, a3 ; RV64I-NEXT: slli a0, a0, 1 -; RV64I-NEXT: or a0, a2, a0 -; RV64I-NEXT: srli a2, a0, 40 -; RV64I-NEXT: and a2, a2, a5 -; RV64I-NEXT: srli a3, a0, 56 -; RV64I-NEXT: or a2, a2, a3 -; RV64I-NEXT: srli a3, a0, 24 -; RV64I-NEXT: and a3, a3, a6 -; RV64I-NEXT: srli a4, a0, 8 -; RV64I-NEXT: and a4, a4, a7 -; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 56 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 24 +; RV64I-NEXT: and a2, a2, a6 +; RV64I-NEXT: srli a3, a0, 8 +; RV64I-NEXT: and a3, a3, a7 ; RV64I-NEXT: or a2, a3, a2 -; RV64I-NEXT: slli a3, a0, 24 -; RV64I-NEXT: and a3, a3, t0 -; RV64I-NEXT: srliw a4, a0, 24 -; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a4, a0, 40 -; RV64I-NEXT: and a1, a4, a1 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: slli a2, a0, 24 +; RV64I-NEXT: and a2, a2, t0 +; RV64I-NEXT: srliw a3, a0, 24 +; RV64I-NEXT: slli a3, a3, 32 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: slli a3, a0, 40 +; RV64I-NEXT: and a3, a3, a4 ; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBP-LABEL: bitreverse_bswap_i64: @@ -2972,30 +2524,18 @@ define i64 @shfl1_i64(i64 %a, i64 %b) nounwind { ; RV64I-LABEL: shfl1_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, 1035469 -; RV64I-NEXT: addiw a1, a1, -819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -819 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -819 -; RV64I-NEXT: slli a1, a1, 13 -; RV64I-NEXT: addi a1, a1, -1639 -; RV64I-NEXT: and a1, a0, a1 -; RV64I-NEXT: slli a2, a0, 1 -; RV64I-NEXT: lui a3, 4369 -; RV64I-NEXT: addiw a3, a3, 273 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 273 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: addi a3, a3, 273 -; RV64I-NEXT: slli a4, a3, 14 -; RV64I-NEXT: addi a4, a4, 1092 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI70_1) +; RV64I-NEXT: ld a1, %lo(.LCPI70_1)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI70_0) +; RV64I-NEXT: ld a2, %lo(.LCPI70_0)(a2) +; RV64I-NEXT: slli a3, a0, 1 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: lui a3, %hi(.LCPI70_2) +; RV64I-NEXT: ld a3, %lo(.LCPI70_2)(a3) +; RV64I-NEXT: and a2, a0, a2 +; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: slli a2, a3, 13 -; RV64I-NEXT: addi a2, a2, 546 -; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: and a0, a0, a3 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; @@ -3048,31 +2588,18 @@ define i64 @shfl2_i64(i64 %a, i64 %b) nounwind { ; RV64I-LABEL: shfl2_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, 1044721 -; RV64I-NEXT: addiw a1, a1, -241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 241 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, -241 -; RV64I-NEXT: slli a1, a1, 14 -; RV64I-NEXT: addi a1, a1, 963 -; RV64I-NEXT: and a1, a0, a1 -; RV64I-NEXT: slli a2, a0, 2 -; RV64I-NEXT: lui a3, 197379 -; RV64I-NEXT: slli a3, a3, 4 -; RV64I-NEXT: addi a3, a3, 771 -; RV64I-NEXT: slli a4, a3, 16 -; RV64I-NEXT: addi a4, a4, 771 -; RV64I-NEXT: slli a4, a4, 12 -; RV64I-NEXT: addi a4, a4, 48 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI72_1) +; RV64I-NEXT: ld a1, %lo(.LCPI72_1)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI72_0) +; RV64I-NEXT: ld a2, %lo(.LCPI72_0)(a2) +; RV64I-NEXT: slli a3, a0, 2 +; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: lui a3, %hi(.LCPI72_2) +; RV64I-NEXT: ld a3, %lo(.LCPI72_2)(a3) +; RV64I-NEXT: and a2, a0, a2 +; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: slli a2, a3, 14 -; RV64I-NEXT: addi a2, a2, 193 -; RV64I-NEXT: slli a2, a2, 12 -; RV64I-NEXT: addi a2, a2, -1012 -; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: and a0, a0, a3 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; @@ -3125,31 +2652,17 @@ define i64 @shfl4_i64(i64 %a, i64 %b) nounwind { ; RV64I-LABEL: shfl4_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, 983295 -; RV64I-NEXT: slli a1, a1, 4 -; RV64I-NEXT: addi a1, a1, 255 -; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: addi a1, a1, 255 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 15 +; RV64I-NEXT: lui a1, %hi(.LCPI74_0) +; RV64I-NEXT: ld a1, %lo(.LCPI74_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI74_1) +; RV64I-NEXT: ld a2, %lo(.LCPI74_1)(a2) +; RV64I-NEXT: slli a3, a0, 4 +; RV64I-NEXT: lui a4, %hi(.LCPI74_2) +; RV64I-NEXT: ld a4, %lo(.LCPI74_2)(a4) +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: and a1, a0, a1 -; RV64I-NEXT: slli a2, a0, 4 -; RV64I-NEXT: lui a3, 983055 -; RV64I-NEXT: slli a3, a3, 4 -; RV64I-NEXT: addi a3, a3, 15 -; RV64I-NEXT: slli a3, a3, 16 -; RV64I-NEXT: addi a3, a3, 15 -; RV64I-NEXT: slli a3, a3, 12 -; RV64I-NEXT: srli a3, a3, 4 -; RV64I-NEXT: and a2, a2, a3 ; RV64I-NEXT: srli a0, a0, 4 -; RV64I-NEXT: lui a3, 240 -; RV64I-NEXT: addiw a3, a3, 15 -; RV64I-NEXT: slli a3, a3, 16 -; RV64I-NEXT: addi a3, a3, 15 -; RV64I-NEXT: slli a3, a3, 20 -; RV64I-NEXT: addi a3, a3, 240 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: and a0, a0, a4 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll @@ -1090,43 +1090,25 @@ ; RV64-NEXT: slli a0, a2, 48 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v11, v8 +; RV64-NEXT: lui a0, %hi(.LCPI18_0) +; RV64-NEXT: ld a0, %lo(.LCPI18_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: lui a0, %hi(.LCPI18_1) +; RV64-NEXT: ld a0, %lo(.LCPI18_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: lui a0, %hi(.LCPI18_2) +; RV64-NEXT: ld a0, %lo(.LCPI18_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -1253,43 +1235,25 @@ ; RV64-NEXT: slli a0, a2, 48 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v14, v8 +; RV64-NEXT: lui a0, %hi(.LCPI19_0) +; RV64-NEXT: ld a0, %lo(.LCPI19_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: lui a0, %hi(.LCPI19_1) +; RV64-NEXT: ld a0, %lo(.LCPI19_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: lui a0, %hi(.LCPI19_2) +; RV64-NEXT: ld a0, %lo(.LCPI19_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -1416,43 +1380,25 @@ ; RV64-NEXT: slli a0, a2, 48 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v20, v8 +; RV64-NEXT: lui a0, %hi(.LCPI20_0) +; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: lui a0, %hi(.LCPI20_1) +; RV64-NEXT: ld a0, %lo(.LCPI20_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v12, v8 ; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: lui a0, %hi(.LCPI20_2) +; RV64-NEXT: ld a0, %lo(.LCPI20_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v12, v8 ; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 @@ -1609,43 +1555,25 @@ ; RV64-NEXT: slli a0, a2, 48 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v0, v8 +; RV64-NEXT: lui a0, %hi(.LCPI21_0) +; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: lui a0, %hi(.LCPI21_1) +; RV64-NEXT: ld a0, %lo(.LCPI21_1)(a0) ; RV64-NEXT: vsll.vi v8, v8, 4 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: lui a0, %hi(.LCPI21_2) +; RV64-NEXT: ld a0, %lo(.LCPI21_2)(a0) ; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll @@ -1653,47 +1653,25 @@ ; RV64-NEXT: vsrl.vx v9, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: lui a0, %hi(.LCPI18_0) +; RV64-NEXT: ld a0, %lo(.LCPI18_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI18_1) +; RV64-NEXT: ld a1, %lo(.LCPI18_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 +; RV64-NEXT: vand.vx v9, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v9, v8 +; RV64-NEXT: lui a0, %hi(.LCPI18_2) +; RV64-NEXT: ld a0, %lo(.LCPI18_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI18_3) +; RV64-NEXT: ld a1, %lo(.LCPI18_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1779,47 +1757,25 @@ ; RV64-NEXT: vsrl.vx v10, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: lui a0, %hi(.LCPI19_0) +; RV64-NEXT: ld a0, %lo(.LCPI19_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI19_1) +; RV64-NEXT: ld a1, %lo(.LCPI19_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 +; RV64-NEXT: vand.vx v10, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v10, v8 +; RV64-NEXT: lui a0, %hi(.LCPI19_2) +; RV64-NEXT: ld a0, %lo(.LCPI19_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI19_3) +; RV64-NEXT: ld a1, %lo(.LCPI19_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1905,47 +1861,25 @@ ; RV64-NEXT: vsrl.vx v12, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: lui a0, %hi(.LCPI20_0) +; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI20_1) +; RV64-NEXT: ld a1, %lo(.LCPI20_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 +; RV64-NEXT: vand.vx v12, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v12, v8 +; RV64-NEXT: lui a0, %hi(.LCPI20_2) +; RV64-NEXT: ld a0, %lo(.LCPI20_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI20_3) +; RV64-NEXT: ld a1, %lo(.LCPI20_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2031,47 +1965,25 @@ ; RV64-NEXT: vsrl.vx v16, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: lui a0, %hi(.LCPI21_0) +; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI21_1) +; RV64-NEXT: ld a1, %lo(.LCPI21_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 +; RV64-NEXT: vand.vx v16, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v16, v8 +; RV64-NEXT: lui a0, %hi(.LCPI21_2) +; RV64-NEXT: ld a0, %lo(.LCPI21_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI21_3) +; RV64-NEXT: ld a1, %lo(.LCPI21_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3637,47 +3549,25 @@ ; RV64-NEXT: vsrl.vx v9, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: lui a0, %hi(.LCPI40_0) +; RV64-NEXT: ld a0, %lo(.LCPI40_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI40_1) +; RV64-NEXT: ld a1, %lo(.LCPI40_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 +; RV64-NEXT: vand.vx v9, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v9, v8 +; RV64-NEXT: lui a0, %hi(.LCPI40_2) +; RV64-NEXT: ld a0, %lo(.LCPI40_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI40_3) +; RV64-NEXT: ld a1, %lo(.LCPI40_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3762,47 +3652,25 @@ ; RV64-NEXT: vsrl.vx v10, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: lui a0, %hi(.LCPI41_0) +; RV64-NEXT: ld a0, %lo(.LCPI41_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI41_1) +; RV64-NEXT: ld a1, %lo(.LCPI41_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 +; RV64-NEXT: vand.vx v10, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v10, v8 +; RV64-NEXT: lui a0, %hi(.LCPI41_2) +; RV64-NEXT: ld a0, %lo(.LCPI41_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI41_3) +; RV64-NEXT: ld a1, %lo(.LCPI41_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3887,47 +3755,25 @@ ; RV64-NEXT: vsrl.vx v12, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: lui a0, %hi(.LCPI42_0) +; RV64-NEXT: ld a0, %lo(.LCPI42_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI42_1) +; RV64-NEXT: ld a1, %lo(.LCPI42_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 +; RV64-NEXT: vand.vx v12, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v12, v8 +; RV64-NEXT: lui a0, %hi(.LCPI42_2) +; RV64-NEXT: ld a0, %lo(.LCPI42_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI42_3) +; RV64-NEXT: ld a1, %lo(.LCPI42_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -4012,47 +3858,25 @@ ; RV64-NEXT: vsrl.vx v16, v8, a0 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vxor.vi v8, v8, -1 +; RV64-NEXT: lui a0, %hi(.LCPI43_0) +; RV64-NEXT: ld a0, %lo(.LCPI43_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI43_1) +; RV64-NEXT: ld a1, %lo(.LCPI43_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 +; RV64-NEXT: vand.vx v16, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v16, v8 +; RV64-NEXT: lui a0, %hi(.LCPI43_2) +; RV64-NEXT: ld a0, %lo(.LCPI43_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI43_3) +; RV64-NEXT: ld a1, %lo(.LCPI43_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll @@ -798,47 +798,25 @@ ; RV64-LABEL: ctpop_nxv1i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV64-NEXT: lui a0, %hi(.LCPI18_0) +; RV64-NEXT: ld a0, %lo(.LCPI18_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI18_1) +; RV64-NEXT: ld a1, %lo(.LCPI18_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 +; RV64-NEXT: vand.vx v9, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v9, v8 +; RV64-NEXT: lui a0, %hi(.LCPI18_2) +; RV64-NEXT: ld a0, %lo(.LCPI18_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI18_3) +; RV64-NEXT: ld a1, %lo(.LCPI18_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -896,47 +874,25 @@ ; RV64-LABEL: ctpop_nxv2i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV64-NEXT: lui a0, %hi(.LCPI19_0) +; RV64-NEXT: ld a0, %lo(.LCPI19_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI19_1) +; RV64-NEXT: ld a1, %lo(.LCPI19_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 +; RV64-NEXT: vand.vx v10, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v10, v8 +; RV64-NEXT: lui a0, %hi(.LCPI19_2) +; RV64-NEXT: ld a0, %lo(.LCPI19_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI19_3) +; RV64-NEXT: ld a1, %lo(.LCPI19_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -994,47 +950,25 @@ ; RV64-LABEL: ctpop_nxv4i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV64-NEXT: lui a0, %hi(.LCPI20_0) +; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI20_1) +; RV64-NEXT: ld a1, %lo(.LCPI20_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 +; RV64-NEXT: vand.vx v12, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v12, v8 +; RV64-NEXT: lui a0, %hi(.LCPI20_2) +; RV64-NEXT: ld a0, %lo(.LCPI20_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI20_3) +; RV64-NEXT: ld a1, %lo(.LCPI20_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1092,47 +1026,25 @@ ; RV64-LABEL: ctpop_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV64-NEXT: lui a0, %hi(.LCPI21_0) +; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI21_1) +; RV64-NEXT: ld a1, %lo(.LCPI21_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 +; RV64-NEXT: vand.vx v16, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v16, v8 +; RV64-NEXT: lui a0, %hi(.LCPI21_2) +; RV64-NEXT: ld a0, %lo(.LCPI21_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI21_3) +; RV64-NEXT: ld a1, %lo(.LCPI21_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll @@ -1557,47 +1557,25 @@ ; RV64-NEXT: vsub.vx v9, v8, a0 ; RV64-NEXT: vxor.vi v8, v8, -1 ; RV64-NEXT: vand.vv v8, v8, v9 +; RV64-NEXT: lui a0, %hi(.LCPI18_0) +; RV64-NEXT: ld a0, %lo(.LCPI18_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI18_1) +; RV64-NEXT: ld a1, %lo(.LCPI18_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 +; RV64-NEXT: vand.vx v9, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v9, v8 +; RV64-NEXT: lui a0, %hi(.LCPI18_2) +; RV64-NEXT: ld a0, %lo(.LCPI18_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI18_3) +; RV64-NEXT: ld a1, %lo(.LCPI18_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1663,47 +1641,25 @@ ; RV64-NEXT: vsub.vx v10, v8, a0 ; RV64-NEXT: vxor.vi v8, v8, -1 ; RV64-NEXT: vand.vv v8, v8, v10 +; RV64-NEXT: lui a0, %hi(.LCPI19_0) +; RV64-NEXT: ld a0, %lo(.LCPI19_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI19_1) +; RV64-NEXT: ld a1, %lo(.LCPI19_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 +; RV64-NEXT: vand.vx v10, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v10, v8 +; RV64-NEXT: lui a0, %hi(.LCPI19_2) +; RV64-NEXT: ld a0, %lo(.LCPI19_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI19_3) +; RV64-NEXT: ld a1, %lo(.LCPI19_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1769,47 +1725,25 @@ ; RV64-NEXT: vsub.vx v12, v8, a0 ; RV64-NEXT: vxor.vi v8, v8, -1 ; RV64-NEXT: vand.vv v8, v8, v12 +; RV64-NEXT: lui a0, %hi(.LCPI20_0) +; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI20_1) +; RV64-NEXT: ld a1, %lo(.LCPI20_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 +; RV64-NEXT: vand.vx v12, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v12, v8 +; RV64-NEXT: lui a0, %hi(.LCPI20_2) +; RV64-NEXT: ld a0, %lo(.LCPI20_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI20_3) +; RV64-NEXT: ld a1, %lo(.LCPI20_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1875,47 +1809,25 @@ ; RV64-NEXT: vsub.vx v16, v8, a0 ; RV64-NEXT: vxor.vi v8, v8, -1 ; RV64-NEXT: vand.vv v8, v8, v16 +; RV64-NEXT: lui a0, %hi(.LCPI21_0) +; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI21_1) +; RV64-NEXT: ld a1, %lo(.LCPI21_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 +; RV64-NEXT: vand.vx v16, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v16, v8 +; RV64-NEXT: lui a0, %hi(.LCPI21_2) +; RV64-NEXT: ld a0, %lo(.LCPI21_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI21_3) +; RV64-NEXT: ld a1, %lo(.LCPI21_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3361,47 +3273,25 @@ ; RV64-NEXT: vsub.vx v9, v8, a0 ; RV64-NEXT: vxor.vi v8, v8, -1 ; RV64-NEXT: vand.vv v8, v8, v9 +; RV64-NEXT: lui a0, %hi(.LCPI40_0) +; RV64-NEXT: ld a0, %lo(.LCPI40_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI40_1) +; RV64-NEXT: ld a1, %lo(.LCPI40_1)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v9, v8, a0 +; RV64-NEXT: vand.vx v9, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v9, v8 +; RV64-NEXT: lui a0, %hi(.LCPI40_2) +; RV64-NEXT: ld a0, %lo(.LCPI40_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI40_3) +; RV64-NEXT: ld a1, %lo(.LCPI40_3)(a1) ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3466,47 +3356,25 @@ ; RV64-NEXT: vsub.vx v10, v8, a0 ; RV64-NEXT: vxor.vi v8, v8, -1 ; RV64-NEXT: vand.vv v8, v8, v10 +; RV64-NEXT: lui a0, %hi(.LCPI41_0) +; RV64-NEXT: ld a0, %lo(.LCPI41_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI41_1) +; RV64-NEXT: ld a1, %lo(.LCPI41_1)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v10, v8, a0 +; RV64-NEXT: vand.vx v10, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v10, v8 +; RV64-NEXT: lui a0, %hi(.LCPI41_2) +; RV64-NEXT: ld a0, %lo(.LCPI41_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI41_3) +; RV64-NEXT: ld a1, %lo(.LCPI41_3)(a1) ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3571,47 +3439,25 @@ ; RV64-NEXT: vsub.vx v12, v8, a0 ; RV64-NEXT: vxor.vi v8, v8, -1 ; RV64-NEXT: vand.vv v8, v8, v12 +; RV64-NEXT: lui a0, %hi(.LCPI42_0) +; RV64-NEXT: ld a0, %lo(.LCPI42_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI42_1) +; RV64-NEXT: ld a1, %lo(.LCPI42_1)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v12, v8, a0 +; RV64-NEXT: vand.vx v12, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v12, v8 +; RV64-NEXT: lui a0, %hi(.LCPI42_2) +; RV64-NEXT: ld a0, %lo(.LCPI42_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI42_3) +; RV64-NEXT: ld a1, %lo(.LCPI42_3)(a1) ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3676,47 +3522,25 @@ ; RV64-NEXT: vsub.vx v16, v8, a0 ; RV64-NEXT: vxor.vi v8, v8, -1 ; RV64-NEXT: vand.vv v8, v8, v16 +; RV64-NEXT: lui a0, %hi(.LCPI43_0) +; RV64-NEXT: ld a0, %lo(.LCPI43_0)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI43_1) +; RV64-NEXT: ld a1, %lo(.LCPI43_1)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: lui a0, 21845 -; RV64-NEXT: addiw a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1365 ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: lui a0, 13107 -; RV64-NEXT: addiw a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 819 -; RV64-NEXT: vand.vx v16, v8, a0 +; RV64-NEXT: vand.vx v16, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vadd.vv v8, v16, v8 +; RV64-NEXT: lui a0, %hi(.LCPI43_2) +; RV64-NEXT: ld a0, %lo(.LCPI43_2)(a0) +; RV64-NEXT: lui a1, %hi(.LCPI43_3) +; RV64-NEXT: ld a1, %lo(.LCPI43_3)(a1) ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: lui a0, 3855 -; RV64-NEXT: addiw a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 241 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: lui a0, 4112 -; RV64-NEXT: addiw a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: addi a0, a0, 257 -; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -422,43 +422,25 @@ ; LMULMAX2-RV64-NEXT: slli a1, a3, 48 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vor.vv v8, v11, v8 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI2_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI2_0)(a1) ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v9 ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX2-RV64-NEXT: lui a1, 3855 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI2_1) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI2_1)(a1) ; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 4 ; LMULMAX2-RV64-NEXT: vor.vv v8, v9, v8 ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX2-RV64-NEXT: lui a1, 13107 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 ; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI2_2) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI2_2)(a1) ; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 2 ; LMULMAX2-RV64-NEXT: vor.vv v8, v9, v8 ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 21845 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v8 @@ -583,43 +565,25 @@ ; LMULMAX1-RV64-NEXT: slli a1, a3, 48 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX1-RV64-NEXT: vor.vv v8, v11, v8 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI2_0) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI2_0)(a1) ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV64-NEXT: lui a1, 3855 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -241 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI2_1) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI2_1)(a1) ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 4 ; LMULMAX1-RV64-NEXT: vor.vv v8, v9, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV64-NEXT: lui a1, 13107 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI2_2) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI2_2)(a1) ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: vor.vv v8, v9, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 21845 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8 @@ -1148,43 +1112,25 @@ ; LMULMAX2-RV64-NEXT: slli a1, a3, 48 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vor.vv v8, v14, v8 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI5_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI5_0)(a1) ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV64-NEXT: lui a1, 3855 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI5_1) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI5_1)(a1) ; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 4 ; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX2-RV64-NEXT: lui a1, 13107 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI5_2) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI5_2)(a1) ; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 2 ; LMULMAX2-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 21845 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v8 @@ -1325,70 +1271,52 @@ ; LMULMAX1-RV64-NEXT: vsrl.vx v10, v9, t0 ; LMULMAX1-RV64-NEXT: li t1, 40 ; LMULMAX1-RV64-NEXT: vsrl.vx v11, v9, t1 -; LMULMAX1-RV64-NEXT: lui a1, 16 -; LMULMAX1-RV64-NEXT: addiw t2, a1, -256 +; LMULMAX1-RV64-NEXT: lui a4, 16 +; LMULMAX1-RV64-NEXT: addiw t2, a4, -256 ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t2 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV64-NEXT: lui a6, 4080 ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a6 ; LMULMAX1-RV64-NEXT: vsrl.vi v12, v9, 8 -; LMULMAX1-RV64-NEXT: li a3, 255 -; LMULMAX1-RV64-NEXT: slli t3, a3, 24 +; LMULMAX1-RV64-NEXT: li a5, 255 +; LMULMAX1-RV64-NEXT: slli t3, a5, 24 ; LMULMAX1-RV64-NEXT: vand.vx v12, v12, t3 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsll.vi v11, v9, 8 -; LMULMAX1-RV64-NEXT: slli t4, a3, 32 +; LMULMAX1-RV64-NEXT: slli t4, a5, 32 ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t4 ; LMULMAX1-RV64-NEXT: vsll.vi v12, v9, 24 -; LMULMAX1-RV64-NEXT: slli a2, a3, 40 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a2 +; LMULMAX1-RV64-NEXT: slli a3, a5, 40 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a3 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vsll.vx v12, v9, t0 ; LMULMAX1-RV64-NEXT: vsll.vx v9, v9, t1 -; LMULMAX1-RV64-NEXT: slli a3, a3, 48 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3 +; LMULMAX1-RV64-NEXT: slli a5, a5, 48 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 ; LMULMAX1-RV64-NEXT: vor.vv v9, v12, v9 +; LMULMAX1-RV64-NEXT: lui a4, %hi(.LCPI5_0) +; LMULMAX1-RV64-NEXT: ld a4, %lo(.LCPI5_0)(a4) ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: lui a4, 3855 -; LMULMAX1-RV64-NEXT: addiw a4, a4, 241 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, -241 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, 241 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, -241 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI5_1) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI5_1)(a1) ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 4 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV64-NEXT: lui a5, 13107 -; LMULMAX1-RV64-NEXT: addiw a5, a5, 819 -; LMULMAX1-RV64-NEXT: slli a5, a5, 12 -; LMULMAX1-RV64-NEXT: addi a5, a5, 819 -; LMULMAX1-RV64-NEXT: slli a5, a5, 12 -; LMULMAX1-RV64-NEXT: addi a5, a5, 819 -; LMULMAX1-RV64-NEXT: slli a5, a5, 12 -; LMULMAX1-RV64-NEXT: addi a5, a5, 819 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a5 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a1 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI5_2) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI5_2)(a2) ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 2 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: lui a1, 21845 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a2 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v9 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, t0 @@ -1404,11 +1332,11 @@ ; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 8 ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t4 ; LMULMAX1-RV64-NEXT: vsll.vi v12, v8, 24 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a2 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a3 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vsll.vx v12, v8, t0 ; LMULMAX1-RV64-NEXT: vsll.vx v8, v8, t1 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 ; LMULMAX1-RV64-NEXT: vor.vv v8, v12, v8 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 @@ -1418,13 +1346,13 @@ ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 4 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a5 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a1 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -658,47 +658,25 @@ ; LMULMAX2-RV64-NEXT: vsrl.vx v9, v8, a1 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v9 ; LMULMAX2-RV64-NEXT: vxor.vi v8, v8, -1 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI3_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI3_1) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 21845 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: lui a1, 13107 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a2 ; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v9, v8 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI3_2) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI3_3) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: lui a1, 3855 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: li a1, 56 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) @@ -779,47 +757,25 @@ ; LMULMAX1-RV64-NEXT: vsrl.vx v9, v8, a1 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v9 ; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI3_0) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI3_1) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 21845 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 13107 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 +; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI3_2) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI3_3) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 3855 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -241 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: lui a1, 4112 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX1-RV64-NEXT: slli a1, a1, 16 -; LMULMAX1-RV64-NEXT: addi a1, a1, 257 -; LMULMAX1-RV64-NEXT: slli a1, a1, 16 -; LMULMAX1-RV64-NEXT: addi a1, a1, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX1-RV64-NEXT: li a1, 56 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) @@ -900,47 +856,25 @@ ; LMULMAX8-RV64-NEXT: vsrl.vx v9, v8, a1 ; LMULMAX8-RV64-NEXT: vor.vv v8, v8, v9 ; LMULMAX8-RV64-NEXT: vxor.vi v8, v8, -1 +; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI3_0) +; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI3_1) +; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX8-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX8-RV64-NEXT: lui a1, 21845 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX8-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX8-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: lui a1, 13107 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: vand.vx v9, v8, a1 +; LMULMAX8-RV64-NEXT: vand.vx v9, v8, a2 ; LMULMAX8-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX8-RV64-NEXT: vadd.vv v8, v9, v8 +; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI3_2) +; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) +; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI3_3) +; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX8-RV64-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX8-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: lui a1, 3855 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, -241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, -241 ; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX8-RV64-NEXT: lui a1, 4112 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX8-RV64-NEXT: slli a1, a1, 16 -; LMULMAX8-RV64-NEXT: addi a1, a1, 257 -; LMULMAX8-RV64-NEXT: slli a1, a1, 16 -; LMULMAX8-RV64-NEXT: addi a1, a1, 257 -; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX8-RV64-NEXT: li a1, 56 ; LMULMAX8-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) @@ -1687,47 +1621,25 @@ ; LMULMAX2-RV64-NEXT: vsrl.vx v10, v8, a1 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vxor.vi v8, v8, -1 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI7_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI7_0)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI7_1) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI7_1)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 21845 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 13107 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a2 ; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI7_2) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI7_2)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI7_3) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 3855 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: li a1, 56 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) @@ -1821,8 +1733,8 @@ ; LMULMAX1-RV64-LABEL: ctlz_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: addi a7, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v8, (a7) +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 @@ -1838,49 +1750,27 @@ ; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a6 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 +; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI7_0) +; LMULMAX1-RV64-NEXT: ld a3, %lo(.LCPI7_0)(a3) +; LMULMAX1-RV64-NEXT: lui a4, %hi(.LCPI7_1) +; LMULMAX1-RV64-NEXT: ld a4, %lo(.LCPI7_1)(a4) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a3, 21845 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 1365 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 1365 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 1365 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 1365 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a4, 13107 -; LMULMAX1-RV64-NEXT: addiw a4, a4, 819 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, 819 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, 819 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, 819 ; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a4 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX1-RV64-NEXT: lui a5, %hi(.LCPI7_2) +; LMULMAX1-RV64-NEXT: ld a5, %lo(.LCPI7_2)(a5) +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI7_3) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a5, 3855 -; LMULMAX1-RV64-NEXT: addiw a5, a5, 241 -; LMULMAX1-RV64-NEXT: slli a5, a5, 12 -; LMULMAX1-RV64-NEXT: addi a5, a5, -241 -; LMULMAX1-RV64-NEXT: slli a5, a5, 12 -; LMULMAX1-RV64-NEXT: addi a5, a5, 241 -; LMULMAX1-RV64-NEXT: slli a5, a5, 12 -; LMULMAX1-RV64-NEXT: addi a5, a5, -241 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: lui a2, 4112 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 257 -; LMULMAX1-RV64-NEXT: slli a2, a2, 16 -; LMULMAX1-RV64-NEXT: addi a2, a2, 257 -; LMULMAX1-RV64-NEXT: slli a2, a2, 16 -; LMULMAX1-RV64-NEXT: addi a2, a2, 257 ; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 -; LMULMAX1-RV64-NEXT: li a1, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: li a7, 56 +; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a7 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2 @@ -1905,9 +1795,9 @@ ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 ; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a2 -; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a1 +; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a7 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v8, (a7) +; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret ; ; LMULMAX8-RV32-LABEL: ctlz_v4i64: @@ -1985,47 +1875,25 @@ ; LMULMAX8-RV64-NEXT: vsrl.vx v10, v8, a1 ; LMULMAX8-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX8-RV64-NEXT: vxor.vi v8, v8, -1 +; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI7_0) +; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI7_0)(a1) +; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI7_1) +; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI7_1)(a2) ; LMULMAX8-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX8-RV64-NEXT: lui a1, 21845 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX8-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX8-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: lui a1, 13107 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: vand.vx v10, v8, a1 +; LMULMAX8-RV64-NEXT: vand.vx v10, v8, a2 ; LMULMAX8-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX8-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI7_2) +; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI7_2)(a1) +; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI7_3) +; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) ; LMULMAX8-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX8-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: lui a1, 3855 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, -241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, -241 ; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX8-RV64-NEXT: lui a1, 4112 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX8-RV64-NEXT: slli a1, a1, 16 -; LMULMAX8-RV64-NEXT: addi a1, a1, 257 -; LMULMAX8-RV64-NEXT: slli a1, a1, 16 -; LMULMAX8-RV64-NEXT: addi a1, a1, 257 -; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX8-RV64-NEXT: li a1, 56 ; LMULMAX8-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll @@ -304,47 +304,25 @@ ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI3_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI3_1) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 21845 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: lui a1, 13107 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a2 ; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v9, v8 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI3_2) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI3_3) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: lui a1, 3855 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: li a1, 56 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) @@ -394,47 +372,25 @@ ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI3_0) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI3_1) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 21845 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 13107 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 +; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI3_2) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI3_3) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 3855 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -241 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: lui a1, 4112 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX1-RV64-NEXT: slli a1, a1, 16 -; LMULMAX1-RV64-NEXT: addi a1, a1, 257 -; LMULMAX1-RV64-NEXT: slli a1, a1, 16 -; LMULMAX1-RV64-NEXT: addi a1, a1, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX1-RV64-NEXT: li a1, 56 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) @@ -840,47 +796,25 @@ ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI7_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI7_0)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI7_1) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI7_1)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 21845 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 13107 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a2 ; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI7_2) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI7_2)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI7_3) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 3855 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: li a1, 56 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) @@ -944,52 +878,13 @@ ; LMULMAX1-RV64-LABEL: ctpop_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: addi a6, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v8, (a6) -; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a2, 21845 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a3, 13107 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 819 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 819 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 819 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 819 -; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a3 -; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a4, 3855 -; LMULMAX1-RV64-NEXT: addiw a4, a4, 241 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, -241 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, 241 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, -241 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 -; LMULMAX1-RV64-NEXT: lui a5, 4112 -; LMULMAX1-RV64-NEXT: addiw a5, a5, 257 -; LMULMAX1-RV64-NEXT: slli a5, a5, 16 -; LMULMAX1-RV64-NEXT: addi a5, a5, 257 -; LMULMAX1-RV64-NEXT: slli a5, a5, 16 -; LMULMAX1-RV64-NEXT: addi a5, a5, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: li a1, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v9, (a1) +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI7_0) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI7_0)(a2) +; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI7_1) +; LMULMAX1-RV64-NEXT: ld a3, %lo(.LCPI7_1)(a3) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v10 @@ -997,13 +892,30 @@ ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 2 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 +; LMULMAX1-RV64-NEXT: lui a4, %hi(.LCPI7_2) +; LMULMAX1-RV64-NEXT: ld a4, %lo(.LCPI7_2)(a4) +; LMULMAX1-RV64-NEXT: lui a5, %hi(.LCPI7_3) +; LMULMAX1-RV64-NEXT: ld a5, %lo(.LCPI7_3)(a5) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 ; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v8, (a6) +; LMULMAX1-RV64-NEXT: li a6, 56 +; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a6 +; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 +; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 +; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a3 +; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 +; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 +; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a5 +; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a6 +; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vse64.v v9, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -598,47 +598,25 @@ ; LMULMAX2-RV64-NEXT: vsub.vx v9, v8, a1 ; LMULMAX2-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX2-RV64-NEXT: vand.vv v8, v8, v9 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI3_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI3_1) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 21845 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: lui a1, 13107 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v9, v8, a2 ; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v9, v8 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI3_2) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI3_3) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX2-RV64-NEXT: lui a1, 3855 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: li a1, 56 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) @@ -699,47 +677,25 @@ ; LMULMAX1-RV64-NEXT: vsub.vx v9, v8, a1 ; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v9 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI3_0) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI3_1) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV64-NEXT: lui a1, 21845 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 13107 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 819 -; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a1 +; LMULMAX1-RV64-NEXT: vand.vx v9, v8, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI3_2) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI3_3) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: lui a1, 3855 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, 241 -; LMULMAX1-RV64-NEXT: slli a1, a1, 12 -; LMULMAX1-RV64-NEXT: addi a1, a1, -241 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX1-RV64-NEXT: lui a1, 4112 -; LMULMAX1-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX1-RV64-NEXT: slli a1, a1, 16 -; LMULMAX1-RV64-NEXT: addi a1, a1, 257 -; LMULMAX1-RV64-NEXT: slli a1, a1, 16 -; LMULMAX1-RV64-NEXT: addi a1, a1, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX1-RV64-NEXT: li a1, 56 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) @@ -800,47 +756,25 @@ ; LMULMAX8-RV64-NEXT: vsub.vx v9, v8, a1 ; LMULMAX8-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX8-RV64-NEXT: vand.vv v8, v8, v9 +; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI3_0) +; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) +; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI3_1) +; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI3_1)(a2) ; LMULMAX8-RV64-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX8-RV64-NEXT: lui a1, 21845 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX8-RV64-NEXT: vand.vx v9, v9, a1 ; LMULMAX8-RV64-NEXT: vsub.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: lui a1, 13107 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: vand.vx v9, v8, a1 +; LMULMAX8-RV64-NEXT: vand.vx v9, v8, a2 ; LMULMAX8-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX8-RV64-NEXT: vadd.vv v8, v9, v8 +; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI3_2) +; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI3_2)(a1) +; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI3_3) +; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI3_3)(a2) ; LMULMAX8-RV64-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX8-RV64-NEXT: vadd.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: lui a1, 3855 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, -241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, -241 ; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX8-RV64-NEXT: lui a1, 4112 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX8-RV64-NEXT: slli a1, a1, 16 -; LMULMAX8-RV64-NEXT: addi a1, a1, 257 -; LMULMAX8-RV64-NEXT: slli a1, a1, 16 -; LMULMAX8-RV64-NEXT: addi a1, a1, 257 -; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX8-RV64-NEXT: li a1, 56 ; LMULMAX8-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) @@ -1483,47 +1417,25 @@ ; LMULMAX2-RV64-NEXT: vsub.vx v10, v8, a1 ; LMULMAX2-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX2-RV64-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI7_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI7_0)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI7_1) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI7_1)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV64-NEXT: lui a1, 21845 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 13107 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 819 -; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v10, v8, a2 ; LMULMAX2-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI7_2) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI7_2)(a1) +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI7_3) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, 3855 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 241 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -241 ; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: lui a1, 4112 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: slli a1, a1, 16 -; LMULMAX2-RV64-NEXT: addi a1, a1, 257 -; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX2-RV64-NEXT: li a1, 56 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) @@ -1597,56 +1509,34 @@ ; LMULMAX1-RV64-LABEL: cttz_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: addi a7, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v8, (a7) +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) ; LMULMAX1-RV64-NEXT: li a6, 1 ; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a6 ; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10 +; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI7_0) +; LMULMAX1-RV64-NEXT: ld a3, %lo(.LCPI7_0)(a3) +; LMULMAX1-RV64-NEXT: lui a4, %hi(.LCPI7_1) +; LMULMAX1-RV64-NEXT: ld a4, %lo(.LCPI7_1)(a4) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a3, 21845 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 1365 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 1365 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 1365 -; LMULMAX1-RV64-NEXT: slli a3, a3, 12 -; LMULMAX1-RV64-NEXT: addi a3, a3, 1365 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a4, 13107 -; LMULMAX1-RV64-NEXT: addiw a4, a4, 819 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, 819 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, 819 -; LMULMAX1-RV64-NEXT: slli a4, a4, 12 -; LMULMAX1-RV64-NEXT: addi a4, a4, 819 ; LMULMAX1-RV64-NEXT: vand.vx v10, v8, a4 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX1-RV64-NEXT: lui a5, %hi(.LCPI7_2) +; LMULMAX1-RV64-NEXT: ld a5, %lo(.LCPI7_2)(a5) +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI7_3) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: lui a5, 3855 -; LMULMAX1-RV64-NEXT: addiw a5, a5, 241 -; LMULMAX1-RV64-NEXT: slli a5, a5, 12 -; LMULMAX1-RV64-NEXT: addi a5, a5, -241 -; LMULMAX1-RV64-NEXT: slli a5, a5, 12 -; LMULMAX1-RV64-NEXT: addi a5, a5, 241 -; LMULMAX1-RV64-NEXT: slli a5, a5, 12 -; LMULMAX1-RV64-NEXT: addi a5, a5, -241 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: lui a2, 4112 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 257 -; LMULMAX1-RV64-NEXT: slli a2, a2, 16 -; LMULMAX1-RV64-NEXT: addi a2, a2, 257 -; LMULMAX1-RV64-NEXT: slli a2, a2, 16 -; LMULMAX1-RV64-NEXT: addi a2, a2, 257 ; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 -; LMULMAX1-RV64-NEXT: li a1, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: li a7, 56 +; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a7 ; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a6 ; LMULMAX1-RV64-NEXT: vxor.vi v9, v9, -1 ; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10 @@ -1661,9 +1551,9 @@ ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 ; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a2 -; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a1 +; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a7 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v8, (a7) +; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret ; ; LMULMAX8-RV32-LABEL: cttz_v4i64: @@ -1721,47 +1611,25 @@ ; LMULMAX8-RV64-NEXT: vsub.vx v10, v8, a1 ; LMULMAX8-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX8-RV64-NEXT: vand.vv v8, v8, v10 +; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI7_0) +; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI7_0)(a1) +; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI7_1) +; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI7_1)(a2) ; LMULMAX8-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX8-RV64-NEXT: lui a1, 21845 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 1365 ; LMULMAX8-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX8-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: lui a1, 13107 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 819 -; LMULMAX8-RV64-NEXT: vand.vx v10, v8, a1 +; LMULMAX8-RV64-NEXT: vand.vx v10, v8, a2 ; LMULMAX8-RV64-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a2 ; LMULMAX8-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX8-RV64-NEXT: lui a1, %hi(.LCPI7_2) +; LMULMAX8-RV64-NEXT: ld a1, %lo(.LCPI7_2)(a1) +; LMULMAX8-RV64-NEXT: lui a2, %hi(.LCPI7_3) +; LMULMAX8-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) ; LMULMAX8-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX8-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: lui a1, 3855 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, -241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, 241 -; LMULMAX8-RV64-NEXT: slli a1, a1, 12 -; LMULMAX8-RV64-NEXT: addi a1, a1, -241 ; LMULMAX8-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX8-RV64-NEXT: lui a1, 4112 -; LMULMAX8-RV64-NEXT: addiw a1, a1, 257 -; LMULMAX8-RV64-NEXT: slli a1, a1, 16 -; LMULMAX8-RV64-NEXT: addi a1, a1, 257 -; LMULMAX8-RV64-NEXT: slli a1, a1, 16 -; LMULMAX8-RV64-NEXT: addi a1, a1, 257 -; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a1 +; LMULMAX8-RV64-NEXT: vmul.vx v8, v8, a2 ; LMULMAX8-RV64-NEXT: li a1, 56 ; LMULMAX8-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -376,16 +376,10 @@ ; ; RV64-LABEL: buildvec_dominant0_v2i32: ; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI20_0) +; RV64-NEXT: ld a1, %lo(.LCPI20_0)(a1) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vmv.v.i v8, -1 -; RV64-NEXT: lui a1, 3641 -; RV64-NEXT: addiw a1, a1, -455 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -455 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -455 -; RV64-NEXT: slli a1, a1, 13 -; RV64-NEXT: addi a1, a1, -910 ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; RV64-NEXT: vmv.s.x v8, a1 ; RV64-NEXT: vse64.v v8, (a0) @@ -465,12 +459,8 @@ ; ; RV64-LABEL: buildvec_seq_v16i8_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, 32880 -; RV64-NEXT: addiw a1, a1, 1541 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi a1, a1, 1027 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi a1, a1, 513 +; RV64-NEXT: lui a1, %hi(.LCPI24_0) +; RV64-NEXT: ld a1, %lo(.LCPI24_0)(a1) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vmv.v.x v8, a1 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu @@ -524,14 +514,10 @@ ; ; RV64-LABEL: buildvec_seq_v9i8: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 3 -; RV64-NEXT: sb a1, 8(a0) -; RV64-NEXT: lui a1, 4104 -; RV64-NEXT: addiw a1, a1, 385 -; RV64-NEXT: slli a1, a1, 17 -; RV64-NEXT: addi a1, a1, 259 -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: addi a1, a1, 513 +; RV64-NEXT: lui a1, %hi(.LCPI26_0) +; RV64-NEXT: ld a1, %lo(.LCPI26_0)(a1) +; RV64-NEXT: li a2, 3 +; RV64-NEXT: sb a2, 8(a0) ; RV64-NEXT: sd a1, 0(a0) ; RV64-NEXT: ret store <9 x i8> , <9 x i8>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1032,29 +1032,17 @@ ; ; RV64-LABEL: mulhu_v2i64: ; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI55_0) +; RV64-NEXT: ld a1, %lo(.LCPI55_0)(a1) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: lui a1, 1035469 -; RV64-NEXT: addiw a1, a1, -819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -819 -; RV64-NEXT: vmv.v.x v9, a1 -; RV64-NEXT: lui a1, 1026731 -; RV64-NEXT: addiw a1, a1, -1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1365 +; RV64-NEXT: lui a2, %hi(.LCPI55_1) +; RV64-NEXT: ld a2, %lo(.LCPI55_1)(a2) +; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: vle64.v v9, (a0) ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; RV64-NEXT: vmv.s.x v9, a1 +; RV64-NEXT: vmv.s.x v8, a2 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vmulhu.vv v8, v8, v9 +; RV64-NEXT: vmulhu.vv v8, v9, v8 ; RV64-NEXT: vid.v v9 ; RV64-NEXT: vadd.vi v9, v9, 1 ; RV64-NEXT: vsrl.vv v8, v8, v9 @@ -1184,16 +1172,10 @@ ; ; RV64-LABEL: mulhs_v4i32: ; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI58_0) +; RV64-NEXT: ld a1, %lo(.LCPI58_0)(a1) ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: lui a1, 13107 -; RV64-NEXT: addiw a1, a1, 819 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 973 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -819 -; RV64-NEXT: slli a1, a1, 13 -; RV64-NEXT: addi a1, a1, -1639 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vmv.v.x v9, a1 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu @@ -1245,25 +1227,20 @@ ; ; RV64-LABEL: mulhs_v2i64: ; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI59_0) +; RV64-NEXT: ld a1, %lo(.LCPI59_0)(a1) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: lui a1, 21845 -; RV64-NEXT: addiw a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a2, a1, 1365 -; RV64-NEXT: vmv.v.x v9, a2 -; RV64-NEXT: addi a1, a1, 1366 +; RV64-NEXT: lui a2, %hi(.LCPI59_1) +; RV64-NEXT: ld a2, %lo(.LCPI59_1)(a2) +; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: vle64.v v9, (a0) ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; RV64-NEXT: vmv.s.x v9, a1 +; RV64-NEXT: vmv.s.x v8, a2 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vmulh.vv v9, v8, v9 +; RV64-NEXT: vmulh.vv v8, v9, v8 ; RV64-NEXT: vid.v v10 ; RV64-NEXT: vrsub.vi v11, v10, 0 -; RV64-NEXT: vmadd.vv v11, v8, v9 +; RV64-NEXT: vmadd.vv v11, v9, v8 ; RV64-NEXT: li a1, 63 ; RV64-NEXT: vsrl.vx v8, v11, a1 ; RV64-NEXT: vsra.vv v9, v11, v10 @@ -4366,51 +4343,27 @@ ; LMULMAX1-RV64-NEXT: slli a2, a2, 63 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; LMULMAX1-RV64-NEXT: vmv.s.x v10, a2 -; LMULMAX1-RV64-NEXT: lui a2, 1044935 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 455 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, 455 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, 455 -; LMULMAX1-RV64-NEXT: slli a2, a2, 13 -; LMULMAX1-RV64-NEXT: addi a2, a2, 911 +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI132_0) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI132_0)(a2) +; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI132_1) +; LMULMAX1-RV64-NEXT: ld a3, %lo(.LCPI132_1)(a3) ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vmv.v.x v11, a2 -; LMULMAX1-RV64-NEXT: lui a2, 4681 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 585 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, 585 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, 585 -; LMULMAX1-RV64-NEXT: slli a2, a2, 13 -; LMULMAX1-RV64-NEXT: addi a2, a2, 1171 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 +; LMULMAX1-RV64-NEXT: vmv.s.x v11, a3 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vmulhu.vv v11, v9, v11 ; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v11 +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI132_2) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI132_2)(a2) ; LMULMAX1-RV64-NEXT: vmulhu.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vid.v v10 -; LMULMAX1-RV64-NEXT: vadd.vi v11, v10, 2 -; LMULMAX1-RV64-NEXT: vsrl.vv v9, v9, v11 -; LMULMAX1-RV64-NEXT: lui a2, 1035469 -; LMULMAX1-RV64-NEXT: addiw a2, a2, -819 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, -819 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, -819 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, -819 ; LMULMAX1-RV64-NEXT: vmv.v.x v11, a2 -; LMULMAX1-RV64-NEXT: lui a2, 1026731 -; LMULMAX1-RV64-NEXT: addiw a2, a2, -1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, -1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, -1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, -1365 +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI132_3) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI132_3)(a2) +; LMULMAX1-RV64-NEXT: vadd.vi v12, v10, 2 +; LMULMAX1-RV64-NEXT: vsrl.vv v9, v9, v12 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -4600,16 +4553,10 @@ ; ; LMULMAX2-RV64-LABEL: mulhs_v8i32: ; LMULMAX2-RV64: # %bb.0: +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI135_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI135_0)(a1) ; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64-NEXT: lui a1, 13107 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 973 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, -819 -; LMULMAX2-RV64-NEXT: slli a1, a1, 13 -; LMULMAX2-RV64-NEXT: addi a1, a1, -1639 ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu @@ -4716,27 +4663,22 @@ ; LMULMAX2-RV64-NEXT: li a1, 5 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI136_0) +; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI136_0)(a1) ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; LMULMAX2-RV64-NEXT: vmv.v.i v10, -1 -; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 0, v0 -; LMULMAX2-RV64-NEXT: lui a1, 21845 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV64-NEXT: slli a1, a1, 12 -; LMULMAX2-RV64-NEXT: addi a2, a1, 1365 -; LMULMAX2-RV64-NEXT: vmv.v.x v12, a2 -; LMULMAX2-RV64-NEXT: addi a1, a1, 1366 -; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v12, a1, v0 -; LMULMAX2-RV64-NEXT: vmulh.vv v12, v8, v12 -; LMULMAX2-RV64-NEXT: vmacc.vv v12, v8, v10 +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI136_1) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI136_1)(a2) +; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV64-NEXT: vmv.v.i v12, -1 +; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 0, v0 +; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a2, v0 +; LMULMAX2-RV64-NEXT: vmulh.vv v10, v8, v10 +; LMULMAX2-RV64-NEXT: vmacc.vv v10, v8, v12 ; LMULMAX2-RV64-NEXT: li a1, 63 -; LMULMAX2-RV64-NEXT: vsrl.vx v8, v12, a1 -; LMULMAX2-RV64-NEXT: vmv.v.i v10, 1 -; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 0, v0 -; LMULMAX2-RV64-NEXT: vsra.vv v10, v12, v10 +; LMULMAX2-RV64-NEXT: vsrl.vx v8, v10, a1 +; LMULMAX2-RV64-NEXT: vmv.v.i v12, 1 +; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 0, v0 +; LMULMAX2-RV64-NEXT: vsra.vv v10, v10, v12 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret @@ -4760,38 +4702,33 @@ ; ; LMULMAX1-RV64-LABEL: mulhs_v4i64: ; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI136_0) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI136_0)(a1) ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV64-NEXT: lui a2, 21845 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV64-NEXT: slli a2, a2, 12 -; LMULMAX1-RV64-NEXT: addi a3, a2, 1365 -; LMULMAX1-RV64-NEXT: vmv.v.x v10, a3 -; LMULMAX1-RV64-NEXT: addi a2, a2, 1366 +; LMULMAX1-RV64-NEXT: vmv.v.x v9, a1 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI136_1) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI136_1)(a1) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-RV64-NEXT: vmv.s.x v10, a2 +; LMULMAX1-RV64-NEXT: vmv.s.x v9, a1 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: vmulh.vv v11, v9, v10 +; LMULMAX1-RV64-NEXT: vmulh.vv v11, v10, v9 ; LMULMAX1-RV64-NEXT: vid.v v12 ; LMULMAX1-RV64-NEXT: vrsub.vi v13, v12, 0 -; LMULMAX1-RV64-NEXT: vmacc.vv v11, v13, v9 -; LMULMAX1-RV64-NEXT: li a2, 63 -; LMULMAX1-RV64-NEXT: vsrl.vx v9, v11, a2 +; LMULMAX1-RV64-NEXT: vmacc.vv v11, v13, v10 +; LMULMAX1-RV64-NEXT: li a1, 63 +; LMULMAX1-RV64-NEXT: vsrl.vx v10, v11, a1 ; LMULMAX1-RV64-NEXT: vsra.vv v11, v11, v12 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v11, v9 -; LMULMAX1-RV64-NEXT: vmulh.vv v10, v8, v10 -; LMULMAX1-RV64-NEXT: vmacc.vv v10, v8, v13 -; LMULMAX1-RV64-NEXT: vsrl.vx v8, v10, a2 -; LMULMAX1-RV64-NEXT: vsra.vv v10, v10, v12 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX1-RV64-NEXT: vadd.vv v10, v11, v10 +; LMULMAX1-RV64-NEXT: vmulh.vv v9, v8, v9 +; LMULMAX1-RV64-NEXT: vmacc.vv v9, v8, v13 +; LMULMAX1-RV64-NEXT: vsrl.vx v8, v9, a1 +; LMULMAX1-RV64-NEXT: vsra.vv v9, v9, v12 +; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a1) +; LMULMAX1-RV64-NEXT: vse64.v v10, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = sdiv <4 x i64> %a, @@ -7405,14 +7342,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: lui a1, 1026731 -; RV64-NEXT: addiw a1, a1, -1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1365 +; RV64-NEXT: lui a1, %hi(.LCPI265_0) +; RV64-NEXT: ld a1, %lo(.LCPI265_0)(a1) ; RV64-NEXT: vmulhu.vx v8, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 1 ; RV64-NEXT: vse64.v v8, (a0) @@ -7529,14 +7460,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: lui a1, 21845 -; RV64-NEXT: addiw a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, 1366 +; RV64-NEXT: lui a1, %hi(.LCPI269_0) +; RV64-NEXT: ld a1, %lo(.LCPI269_0)(a1) ; RV64-NEXT: vmulh.vx v8, v8, a1 ; RV64-NEXT: li a1, 63 ; RV64-NEXT: vsrl.vx v9, v8, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -469,14 +469,8 @@ ; ; RV64-LMULMAX4-LABEL: buildvec_mask_v64i1: ; RV64-LMULMAX4: # %bb.0: -; RV64-LMULMAX4-NEXT: lui a0, 1048429 -; RV64-LMULMAX4-NEXT: addiw a0, a0, 1735 -; RV64-LMULMAX4-NEXT: slli a0, a0, 13 -; RV64-LMULMAX4-NEXT: addi a0, a0, 1023 -; RV64-LMULMAX4-NEXT: slli a0, a0, 13 -; RV64-LMULMAX4-NEXT: addi a0, a0, -1189 -; RV64-LMULMAX4-NEXT: slli a0, a0, 17 -; RV64-LMULMAX4-NEXT: addi a0, a0, 1776 +; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI19_0) +; RV64-LMULMAX4-NEXT: ld a0, %lo(.LCPI19_0)(a0) ; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; RV64-LMULMAX4-NEXT: vmv.s.x v0, a0 ; RV64-LMULMAX4-NEXT: ret @@ -496,14 +490,8 @@ ; ; RV64-LMULMAX8-LABEL: buildvec_mask_v64i1: ; RV64-LMULMAX8: # %bb.0: -; RV64-LMULMAX8-NEXT: lui a0, 1048429 -; RV64-LMULMAX8-NEXT: addiw a0, a0, 1735 -; RV64-LMULMAX8-NEXT: slli a0, a0, 13 -; RV64-LMULMAX8-NEXT: addi a0, a0, 1023 -; RV64-LMULMAX8-NEXT: slli a0, a0, 13 -; RV64-LMULMAX8-NEXT: addi a0, a0, -1189 -; RV64-LMULMAX8-NEXT: slli a0, a0, 17 -; RV64-LMULMAX8-NEXT: addi a0, a0, 1776 +; RV64-LMULMAX8-NEXT: lui a0, %hi(.LCPI19_0) +; RV64-LMULMAX8-NEXT: ld a0, %lo(.LCPI19_0)(a0) ; RV64-LMULMAX8-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; RV64-LMULMAX8-NEXT: vmv.s.x v0, a0 ; RV64-LMULMAX8-NEXT: ret @@ -613,23 +601,13 @@ ; ; RV64-LMULMAX4-LABEL: buildvec_mask_v128i1: ; RV64-LMULMAX4: # %bb.0: -; RV64-LMULMAX4-NEXT: lui a0, 841543 -; RV64-LMULMAX4-NEXT: addiw a0, a0, 511 -; RV64-LMULMAX4-NEXT: slli a0, a0, 14 -; RV64-LMULMAX4-NEXT: addi a0, a0, 859 -; RV64-LMULMAX4-NEXT: slli a0, a0, 17 -; RV64-LMULMAX4-NEXT: addi a0, a0, 1776 +; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI20_0) +; RV64-LMULMAX4-NEXT: ld a0, %lo(.LCPI20_0)(a0) +; RV64-LMULMAX4-NEXT: lui a1, %hi(.LCPI20_1) +; RV64-LMULMAX4-NEXT: ld a1, %lo(.LCPI20_1)(a1) ; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV64-LMULMAX4-NEXT: vmv.s.x v8, a0 -; RV64-LMULMAX4-NEXT: lui a0, 1048429 -; RV64-LMULMAX4-NEXT: addiw a0, a0, 1735 -; RV64-LMULMAX4-NEXT: slli a0, a0, 13 -; RV64-LMULMAX4-NEXT: addi a0, a0, 1023 -; RV64-LMULMAX4-NEXT: slli a0, a0, 13 -; RV64-LMULMAX4-NEXT: addi a0, a0, -1189 -; RV64-LMULMAX4-NEXT: slli a0, a0, 17 -; RV64-LMULMAX4-NEXT: addi a0, a0, 1776 ; RV64-LMULMAX4-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX4-NEXT: vmv.s.x v8, a1 ; RV64-LMULMAX4-NEXT: ret ; ; RV32-LMULMAX8-LABEL: buildvec_mask_v128i1: @@ -659,23 +637,13 @@ ; ; RV64-LMULMAX8-LABEL: buildvec_mask_v128i1: ; RV64-LMULMAX8: # %bb.0: -; RV64-LMULMAX8-NEXT: lui a0, 841543 -; RV64-LMULMAX8-NEXT: addiw a0, a0, 511 -; RV64-LMULMAX8-NEXT: slli a0, a0, 14 -; RV64-LMULMAX8-NEXT: addi a0, a0, 859 -; RV64-LMULMAX8-NEXT: slli a0, a0, 17 -; RV64-LMULMAX8-NEXT: addi a0, a0, 1776 +; RV64-LMULMAX8-NEXT: lui a0, %hi(.LCPI20_0) +; RV64-LMULMAX8-NEXT: ld a0, %lo(.LCPI20_0)(a0) +; RV64-LMULMAX8-NEXT: lui a1, %hi(.LCPI20_1) +; RV64-LMULMAX8-NEXT: ld a1, %lo(.LCPI20_1)(a1) ; RV64-LMULMAX8-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-LMULMAX8-NEXT: vmv.s.x v8, a0 -; RV64-LMULMAX8-NEXT: lui a0, 1048429 -; RV64-LMULMAX8-NEXT: addiw a0, a0, 1735 -; RV64-LMULMAX8-NEXT: slli a0, a0, 13 -; RV64-LMULMAX8-NEXT: addi a0, a0, 1023 -; RV64-LMULMAX8-NEXT: slli a0, a0, 13 -; RV64-LMULMAX8-NEXT: addi a0, a0, -1189 -; RV64-LMULMAX8-NEXT: slli a0, a0, 17 -; RV64-LMULMAX8-NEXT: addi a0, a0, 1776 -; RV64-LMULMAX8-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX8-NEXT: vmv.s.x v0, a1 ; RV64-LMULMAX8-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; RV64-LMULMAX8-NEXT: vslideup.vi v0, v8, 1 ; RV64-LMULMAX8-NEXT: ret @@ -775,23 +743,13 @@ ; ; RV64-LMULMAX4-LABEL: buildvec_mask_optsize_v128i1: ; RV64-LMULMAX4: # %bb.0: -; RV64-LMULMAX4-NEXT: lui a0, 841543 -; RV64-LMULMAX4-NEXT: addiw a0, a0, 511 -; RV64-LMULMAX4-NEXT: slli a0, a0, 14 -; RV64-LMULMAX4-NEXT: addi a0, a0, 859 -; RV64-LMULMAX4-NEXT: slli a0, a0, 17 -; RV64-LMULMAX4-NEXT: addi a0, a0, 1776 +; RV64-LMULMAX4-NEXT: lui a0, %hi(.LCPI21_0) +; RV64-LMULMAX4-NEXT: ld a0, %lo(.LCPI21_0)(a0) +; RV64-LMULMAX4-NEXT: lui a1, %hi(.LCPI21_1) +; RV64-LMULMAX4-NEXT: ld a1, %lo(.LCPI21_1)(a1) ; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV64-LMULMAX4-NEXT: vmv.s.x v8, a0 -; RV64-LMULMAX4-NEXT: lui a0, 1048429 -; RV64-LMULMAX4-NEXT: addiw a0, a0, 1735 -; RV64-LMULMAX4-NEXT: slli a0, a0, 13 -; RV64-LMULMAX4-NEXT: addi a0, a0, 1023 -; RV64-LMULMAX4-NEXT: slli a0, a0, 13 -; RV64-LMULMAX4-NEXT: addi a0, a0, -1189 -; RV64-LMULMAX4-NEXT: slli a0, a0, 17 -; RV64-LMULMAX4-NEXT: addi a0, a0, 1776 ; RV64-LMULMAX4-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX4-NEXT: vmv.s.x v8, a1 ; RV64-LMULMAX4-NEXT: ret ; ; RV32-LMULMAX8-LABEL: buildvec_mask_optsize_v128i1: diff --git a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll --- a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll @@ -27,16 +27,10 @@ ; RV64-1024-NEXT: vslideup.vi v8, v16, 0 ; RV64-1024-NEXT: vsetvli zero, a1, e16, m4, tu, mu ; RV64-1024-NEXT: vslideup.vx v8, v24, a3 +; RV64-1024-NEXT: lui a2, %hi(.LCPI0_0) +; RV64-1024-NEXT: ld a2, %lo(.LCPI0_0)(a2) ; RV64-1024-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; RV64-1024-NEXT: vrgather.vv v16, v0, v28 -; RV64-1024-NEXT: lui a2, 1026731 -; RV64-1024-NEXT: addiw a2, a2, -1365 -; RV64-1024-NEXT: slli a2, a2, 12 -; RV64-1024-NEXT: addi a2, a2, -1365 -; RV64-1024-NEXT: slli a2, a2, 12 -; RV64-1024-NEXT: addi a2, a2, -1365 -; RV64-1024-NEXT: slli a2, a2, 12 -; RV64-1024-NEXT: addi a2, a2, -1366 ; RV64-1024-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; RV64-1024-NEXT: vmv.s.x v20, a2 ; RV64-1024-NEXT: vsetivli zero, 2, e64, m1, tu, mu @@ -64,39 +58,33 @@ ; RV64-2048-NEXT: vmv2r.v v14, v8 ; RV64-2048-NEXT: vslideup.vi v14, v10, 0 ; RV64-2048-NEXT: vsetvli zero, a3, e16, m1, ta, mu -; RV64-2048-NEXT: vmv.v.i v16, 0 +; RV64-2048-NEXT: vmv.v.i v10, 0 ; RV64-2048-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; RV64-2048-NEXT: vslideup.vx v14, v16, a3 +; RV64-2048-NEXT: vslideup.vx v14, v10, a3 ; RV64-2048-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64-2048-NEXT: vid.v v18 -; RV64-2048-NEXT: vsrl.vi v10, v18, 1 -; RV64-2048-NEXT: vrgather.vv v20, v14, v10 +; RV64-2048-NEXT: vid.v v16 +; RV64-2048-NEXT: vsrl.vi v18, v16, 1 +; RV64-2048-NEXT: vrgather.vv v20, v14, v18 ; RV64-2048-NEXT: vsetvli zero, a3, e16, m2, tu, mu ; RV64-2048-NEXT: vslideup.vi v8, v12, 0 ; RV64-2048-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; RV64-2048-NEXT: vslideup.vx v8, v16, a3 +; RV64-2048-NEXT: vslideup.vx v8, v10, a3 +; RV64-2048-NEXT: lui a2, %hi(.LCPI0_0) +; RV64-2048-NEXT: ld a2, %lo(.LCPI0_0)(a2) ; RV64-2048-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64-2048-NEXT: vrgather.vv v12, v20, v18 -; RV64-2048-NEXT: lui a2, 1026731 -; RV64-2048-NEXT: addiw a2, a2, -1365 -; RV64-2048-NEXT: slli a2, a2, 12 -; RV64-2048-NEXT: addi a2, a2, -1365 -; RV64-2048-NEXT: slli a2, a2, 12 -; RV64-2048-NEXT: addi a2, a2, -1365 -; RV64-2048-NEXT: slli a2, a2, 12 -; RV64-2048-NEXT: addi a2, a2, -1366 +; RV64-2048-NEXT: vrgather.vv v10, v20, v16 ; RV64-2048-NEXT: vsetivli zero, 4, e64, m1, ta, mu -; RV64-2048-NEXT: vmv.s.x v14, a2 +; RV64-2048-NEXT: vmv.s.x v12, a2 ; RV64-2048-NEXT: vsetivli zero, 2, e64, m1, tu, mu -; RV64-2048-NEXT: vmv1r.v v0, v14 -; RV64-2048-NEXT: vslideup.vi v0, v14, 1 +; RV64-2048-NEXT: vmv1r.v v0, v12 +; RV64-2048-NEXT: vslideup.vi v0, v12, 1 ; RV64-2048-NEXT: vsetivli zero, 3, e64, m1, tu, mu -; RV64-2048-NEXT: vslideup.vi v0, v14, 2 +; RV64-2048-NEXT: vslideup.vi v0, v12, 2 ; RV64-2048-NEXT: vsetivli zero, 4, e64, m1, tu, mu -; RV64-2048-NEXT: vslideup.vi v0, v14, 3 +; RV64-2048-NEXT: vslideup.vi v0, v12, 3 ; RV64-2048-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; RV64-2048-NEXT: vrgather.vv v12, v8, v10, v0.t -; RV64-2048-NEXT: vse16.v v12, (a0) +; RV64-2048-NEXT: vrgather.vv v10, v8, v18, v0.t +; RV64-2048-NEXT: vse16.v v10, (a0) ; RV64-2048-NEXT: ret entry: %ve = load <128 x i16>, <128 x i16>* %0, align 256 @@ -119,10 +107,11 @@ ; RV64-1024-NEXT: sub sp, sp, a3 ; RV64-1024-NEXT: li a3, 256 ; RV64-1024-NEXT: vsetvli zero, a3, e16, m4, ta, mu -; RV64-1024-NEXT: vle16.v v16, (a1) +; RV64-1024-NEXT: vle16.v v24, (a1) ; RV64-1024-NEXT: vle16.v v8, (a2) ; RV64-1024-NEXT: csrr a1, vlenb -; RV64-1024-NEXT: slli a1, a1, 4 +; RV64-1024-NEXT: li a2, 24 +; RV64-1024-NEXT: mul a1, a1, a2 ; RV64-1024-NEXT: add a1, sp, a1 ; RV64-1024-NEXT: addi a1, a1, 16 ; RV64-1024-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill @@ -130,12 +119,12 @@ ; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; RV64-1024-NEXT: vmv.v.i v8, 0 ; RV64-1024-NEXT: csrr a2, vlenb -; RV64-1024-NEXT: slli a2, a2, 5 +; RV64-1024-NEXT: slli a2, a2, 4 ; RV64-1024-NEXT: add a2, sp, a2 ; RV64-1024-NEXT: addi a2, a2, 16 ; RV64-1024-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV64-1024-NEXT: vsetvli zero, a3, e16, m8, tu, mu -; RV64-1024-NEXT: vslideup.vi v8, v16, 0 +; RV64-1024-NEXT: vslideup.vi v8, v24, 0 ; RV64-1024-NEXT: vsetvli zero, a3, e16, m4, ta, mu ; RV64-1024-NEXT: vmv.v.i v16, 0 ; RV64-1024-NEXT: addi a2, sp, 16 @@ -146,8 +135,7 @@ ; RV64-1024-NEXT: vid.v v24 ; RV64-1024-NEXT: vsrl.vi v16, v24, 1 ; RV64-1024-NEXT: csrr a2, vlenb -; RV64-1024-NEXT: li a4, 24 -; RV64-1024-NEXT: mul a2, a2, a4 +; RV64-1024-NEXT: slli a2, a2, 5 ; RV64-1024-NEXT: add a2, sp, a2 ; RV64-1024-NEXT: addi a2, a2, 16 ; RV64-1024-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill @@ -159,70 +147,54 @@ ; RV64-1024-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill ; RV64-1024-NEXT: vsetvli zero, a3, e16, m8, tu, mu ; RV64-1024-NEXT: csrr a2, vlenb -; RV64-1024-NEXT: slli a2, a2, 5 -; RV64-1024-NEXT: add a2, sp, a2 -; RV64-1024-NEXT: addi a2, a2, 16 -; RV64-1024-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload -; RV64-1024-NEXT: csrr a2, vlenb ; RV64-1024-NEXT: slli a2, a2, 4 ; RV64-1024-NEXT: add a2, sp, a2 ; RV64-1024-NEXT: addi a2, a2, 16 ; RV64-1024-NEXT: vl8re8.v v16, (a2) # Unknown-size Folded Reload -; RV64-1024-NEXT: vslideup.vi v8, v16, 0 -; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, tu, mu -; RV64-1024-NEXT: addi a2, sp, 16 -; RV64-1024-NEXT: vl8re8.v v16, (a2) # Unknown-size Folded Reload -; RV64-1024-NEXT: vslideup.vx v8, v16, a3 ; RV64-1024-NEXT: csrr a2, vlenb -; RV64-1024-NEXT: slli a2, a2, 5 +; RV64-1024-NEXT: li a4, 24 +; RV64-1024-NEXT: mul a2, a2, a4 ; RV64-1024-NEXT: add a2, sp, a2 ; RV64-1024-NEXT: addi a2, a2, 16 -; RV64-1024-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-1024-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV64-1024-NEXT: vslideup.vi v16, v8, 0 +; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; RV64-1024-NEXT: addi a2, sp, 16 +; RV64-1024-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload +; RV64-1024-NEXT: vslideup.vx v16, v8, a3 +; RV64-1024-NEXT: lui a2, %hi(.LCPI1_0) +; RV64-1024-NEXT: ld a2, %lo(.LCPI1_0)(a2) ; RV64-1024-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; RV64-1024-NEXT: csrr a2, vlenb -; RV64-1024-NEXT: slli a2, a2, 3 -; RV64-1024-NEXT: add a2, sp, a2 -; RV64-1024-NEXT: addi a2, a2, 16 -; RV64-1024-NEXT: vl8re8.v v16, (a2) # Unknown-size Folded Reload -; RV64-1024-NEXT: vrgather.vv v8, v16, v24 -; RV64-1024-NEXT: lui a2, 1026731 -; RV64-1024-NEXT: addiw a2, a2, -1365 -; RV64-1024-NEXT: slli a2, a2, 12 -; RV64-1024-NEXT: addi a2, a2, -1365 -; RV64-1024-NEXT: slli a2, a2, 12 -; RV64-1024-NEXT: addi a2, a2, -1365 -; RV64-1024-NEXT: slli a2, a2, 12 -; RV64-1024-NEXT: addi a2, a2, -1366 +; RV64-1024-NEXT: csrr a3, vlenb +; RV64-1024-NEXT: slli a3, a3, 3 +; RV64-1024-NEXT: add a3, sp, a3 +; RV64-1024-NEXT: addi a3, a3, 16 +; RV64-1024-NEXT: vl8re8.v v0, (a3) # Unknown-size Folded Reload +; RV64-1024-NEXT: vrgather.vv v8, v0, v24 ; RV64-1024-NEXT: vsetivli zero, 8, e64, m1, ta, mu -; RV64-1024-NEXT: vmv.s.x v16, a2 +; RV64-1024-NEXT: vmv.s.x v24, a2 ; RV64-1024-NEXT: vsetivli zero, 2, e64, m1, tu, mu -; RV64-1024-NEXT: vmv1r.v v0, v16 -; RV64-1024-NEXT: vslideup.vi v0, v16, 1 +; RV64-1024-NEXT: vmv1r.v v0, v24 +; RV64-1024-NEXT: vslideup.vi v0, v24, 1 ; RV64-1024-NEXT: vsetivli zero, 3, e64, m1, tu, mu -; RV64-1024-NEXT: vslideup.vi v0, v16, 2 +; RV64-1024-NEXT: vslideup.vi v0, v24, 2 ; RV64-1024-NEXT: vsetivli zero, 4, e64, m1, tu, mu -; RV64-1024-NEXT: vslideup.vi v0, v16, 3 +; RV64-1024-NEXT: vslideup.vi v0, v24, 3 ; RV64-1024-NEXT: vsetivli zero, 5, e64, m1, tu, mu -; RV64-1024-NEXT: vslideup.vi v0, v16, 4 +; RV64-1024-NEXT: vslideup.vi v0, v24, 4 ; RV64-1024-NEXT: vsetivli zero, 6, e64, m1, tu, mu -; RV64-1024-NEXT: vslideup.vi v0, v16, 5 +; RV64-1024-NEXT: vslideup.vi v0, v24, 5 ; RV64-1024-NEXT: vsetivli zero, 7, e64, m1, tu, mu -; RV64-1024-NEXT: vslideup.vi v0, v16, 6 +; RV64-1024-NEXT: vslideup.vi v0, v24, 6 ; RV64-1024-NEXT: vsetivli zero, 8, e64, m1, tu, mu -; RV64-1024-NEXT: vslideup.vi v0, v16, 7 +; RV64-1024-NEXT: vslideup.vi v0, v24, 7 ; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; RV64-1024-NEXT: csrr a1, vlenb -; RV64-1024-NEXT: li a2, 24 -; RV64-1024-NEXT: mul a1, a1, a2 -; RV64-1024-NEXT: add a1, sp, a1 -; RV64-1024-NEXT: addi a1, a1, 16 -; RV64-1024-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload -; RV64-1024-NEXT: csrr a1, vlenb ; RV64-1024-NEXT: slli a1, a1, 5 ; RV64-1024-NEXT: add a1, sp, a1 ; RV64-1024-NEXT: addi a1, a1, 16 ; RV64-1024-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload -; RV64-1024-NEXT: vrgather.vv v8, v24, v16, v0.t +; RV64-1024-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV64-1024-NEXT: vse16.v v8, (a0) ; RV64-1024-NEXT: csrr a0, vlenb ; RV64-1024-NEXT: li a1, 40 @@ -255,16 +227,10 @@ ; RV64-2048-NEXT: vslideup.vi v8, v16, 0 ; RV64-2048-NEXT: vsetvli zero, a1, e16, m4, tu, mu ; RV64-2048-NEXT: vslideup.vx v8, v24, a3 +; RV64-2048-NEXT: lui a2, %hi(.LCPI1_0) +; RV64-2048-NEXT: ld a2, %lo(.LCPI1_0)(a2) ; RV64-2048-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; RV64-2048-NEXT: vrgather.vv v16, v0, v28 -; RV64-2048-NEXT: lui a2, 1026731 -; RV64-2048-NEXT: addiw a2, a2, -1365 -; RV64-2048-NEXT: slli a2, a2, 12 -; RV64-2048-NEXT: addi a2, a2, -1365 -; RV64-2048-NEXT: slli a2, a2, 12 -; RV64-2048-NEXT: addi a2, a2, -1365 -; RV64-2048-NEXT: slli a2, a2, 12 -; RV64-2048-NEXT: addi a2, a2, -1366 ; RV64-2048-NEXT: vsetivli zero, 8, e64, m1, ta, mu ; RV64-2048-NEXT: vmv.s.x v20, a2 ; RV64-2048-NEXT: vsetivli zero, 2, e64, m1, tu, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll @@ -918,14 +918,8 @@ ; ; RV64-LABEL: vdiv_vi_nxv1i64_0: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1029851 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 +; RV64-NEXT: lui a0, %hi(.LCPI58_0) +; RV64-NEXT: ld a0, %lo(.LCPI58_0)(a0) ; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu ; RV64-NEXT: vmulh.vx v8, v8, a0 ; RV64-NEXT: li a0, 63 @@ -998,14 +992,8 @@ ; ; RV64-LABEL: vdiv_vi_nxv2i64_0: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1029851 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 +; RV64-NEXT: lui a0, %hi(.LCPI61_0) +; RV64-NEXT: ld a0, %lo(.LCPI61_0)(a0) ; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu ; RV64-NEXT: vmulh.vx v8, v8, a0 ; RV64-NEXT: li a0, 63 @@ -1078,14 +1066,8 @@ ; ; RV64-LABEL: vdiv_vi_nxv4i64_0: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1029851 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 +; RV64-NEXT: lui a0, %hi(.LCPI64_0) +; RV64-NEXT: ld a0, %lo(.LCPI64_0)(a0) ; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu ; RV64-NEXT: vmulh.vx v8, v8, a0 ; RV64-NEXT: li a0, 63 @@ -1158,14 +1140,8 @@ ; ; RV64-LABEL: vdiv_vi_nxv8i64_0: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1029851 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 +; RV64-NEXT: lui a0, %hi(.LCPI67_0) +; RV64-NEXT: ld a0, %lo(.LCPI67_0)(a0) ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vmulh.vx v8, v8, a0 ; RV64-NEXT: li a0, 63 diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll @@ -954,14 +954,8 @@ ; ; RV64-LABEL: vrem_vi_nxv1i64_0: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1029851 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 +; RV64-NEXT: lui a0, %hi(.LCPI56_0) +; RV64-NEXT: ld a0, %lo(.LCPI56_0)(a0) ; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu ; RV64-NEXT: vmulh.vx v9, v8, a0 ; RV64-NEXT: li a0, 63 @@ -1038,14 +1032,8 @@ ; ; RV64-LABEL: vrem_vi_nxv2i64_0: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1029851 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 +; RV64-NEXT: lui a0, %hi(.LCPI59_0) +; RV64-NEXT: ld a0, %lo(.LCPI59_0)(a0) ; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu ; RV64-NEXT: vmulh.vx v10, v8, a0 ; RV64-NEXT: li a0, 63 @@ -1122,14 +1110,8 @@ ; ; RV64-LABEL: vrem_vi_nxv4i64_0: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1029851 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 +; RV64-NEXT: lui a0, %hi(.LCPI62_0) +; RV64-NEXT: ld a0, %lo(.LCPI62_0)(a0) ; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu ; RV64-NEXT: vmulh.vx v12, v8, a0 ; RV64-NEXT: li a0, 63 @@ -1206,14 +1188,8 @@ ; ; RV64-LABEL: vrem_vi_nxv8i64_0: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 1029851 -; RV64-NEXT: addiw a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, 1755 +; RV64-NEXT: lui a0, %hi(.LCPI65_0) +; RV64-NEXT: ld a0, %lo(.LCPI65_0)(a0) ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vmulh.vx v16, v8, a0 ; RV64-NEXT: li a0, 63 diff --git a/llvm/test/CodeGen/RISCV/srem-lkk.ll b/llvm/test/CodeGen/RISCV/srem-lkk.ll --- a/llvm/test/CodeGen/RISCV/srem-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-lkk.ll @@ -439,14 +439,8 @@ ; ; RV64IM-LABEL: dont_fold_srem_i64: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a1, 2675 -; RV64IM-NEXT: addiw a1, a1, -251 -; RV64IM-NEXT: slli a1, a1, 13 -; RV64IM-NEXT: addi a1, a1, 1839 -; RV64IM-NEXT: slli a1, a1, 13 -; RV64IM-NEXT: addi a1, a1, 167 -; RV64IM-NEXT: slli a1, a1, 13 -; RV64IM-NEXT: addi a1, a1, 1505 +; RV64IM-NEXT: lui a1, %hi(.LCPI8_0) +; RV64IM-NEXT: ld a1, %lo(.LCPI8_0)(a1) ; RV64IM-NEXT: mulh a1, a0, a1 ; RV64IM-NEXT: srli a2, a1, 63 ; RV64IM-NEXT: srai a1, a1, 5 diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -162,16 +162,10 @@ ; ; RV64M-LABEL: test_srem_even: ; RV64M: # %bb.0: +; RV64M-NEXT: lui a1, %hi(.LCPI1_0) +; RV64M-NEXT: ld a1, %lo(.LCPI1_0)(a1) ; RV64M-NEXT: slli a0, a0, 60 ; RV64M-NEXT: srai a0, a0, 60 -; RV64M-NEXT: lui a1, 10923 -; RV64M-NEXT: addiw a1, a1, -1365 -; RV64M-NEXT: slli a1, a1, 12 -; RV64M-NEXT: addi a1, a1, -1365 -; RV64M-NEXT: slli a1, a1, 12 -; RV64M-NEXT: addi a1, a1, -1365 -; RV64M-NEXT: slli a1, a1, 12 -; RV64M-NEXT: addi a1, a1, -1365 ; RV64M-NEXT: mulh a1, a0, a1 ; RV64M-NEXT: srli a2, a1, 63 ; RV64M-NEXT: add a1, a1, a2 @@ -200,16 +194,10 @@ ; ; RV64MV-LABEL: test_srem_even: ; RV64MV: # %bb.0: +; RV64MV-NEXT: lui a1, %hi(.LCPI1_0) +; RV64MV-NEXT: ld a1, %lo(.LCPI1_0)(a1) ; RV64MV-NEXT: slli a0, a0, 60 ; RV64MV-NEXT: srai a0, a0, 60 -; RV64MV-NEXT: lui a1, 10923 -; RV64MV-NEXT: addiw a1, a1, -1365 -; RV64MV-NEXT: slli a1, a1, 12 -; RV64MV-NEXT: addi a1, a1, -1365 -; RV64MV-NEXT: slli a1, a1, 12 -; RV64MV-NEXT: addi a1, a1, -1365 -; RV64MV-NEXT: slli a1, a1, 12 -; RV64MV-NEXT: addi a1, a1, -1365 ; RV64MV-NEXT: mulh a1, a0, a1 ; RV64MV-NEXT: srli a2, a1, 63 ; RV64MV-NEXT: add a1, a1, a2 @@ -426,24 +414,12 @@ ; RV64-NEXT: mv a0, s1 ; RV64-NEXT: call __moddi3@plt ; RV64-NEXT: mv s1, a0 -; RV64-NEXT: lui a0, 1026731 -; RV64-NEXT: addiw a0, a0, -1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -1365 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a1, a0, -1365 +; RV64-NEXT: lui a0, %hi(.LCPI3_0) +; RV64-NEXT: ld a1, %lo(.LCPI3_0)(a0) ; RV64-NEXT: mv a0, s2 ; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: lui a1, 10923 -; RV64-NEXT: addiw a1, a1, -1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1365 -; RV64-NEXT: slli a1, a1, 12 -; RV64-NEXT: addi a1, a1, -1366 +; RV64-NEXT: lui a1, %hi(.LCPI3_1) +; RV64-NEXT: ld a1, %lo(.LCPI3_1)(a1) ; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: slli a2, a0, 63 ; RV64-NEXT: srli a0, a0, 1 @@ -567,44 +543,32 @@ ; RV64M-NEXT: lb a1, 12(a0) ; RV64M-NEXT: lwu a2, 8(a0) ; RV64M-NEXT: slli a1, a1, 32 -; RV64M-NEXT: or a2, a2, a1 +; RV64M-NEXT: or a1, a2, a1 ; RV64M-NEXT: li a6, -1 ; RV64M-NEXT: srli a3, a6, 24 -; RV64M-NEXT: and a2, a2, a3 +; RV64M-NEXT: and a1, a1, a3 ; RV64M-NEXT: ld a3, 0(a0) -; RV64M-NEXT: slli a4, a2, 29 +; RV64M-NEXT: slli a4, a1, 29 ; RV64M-NEXT: srai a4, a4, 31 -; RV64M-NEXT: slli a2, a2, 31 +; RV64M-NEXT: slli a1, a1, 31 ; RV64M-NEXT: srli a5, a3, 33 -; RV64M-NEXT: or a2, a5, a2 -; RV64M-NEXT: slli a2, a2, 31 -; RV64M-NEXT: srai a2, a2, 31 +; RV64M-NEXT: lui a2, %hi(.LCPI3_0) +; RV64M-NEXT: ld a2, %lo(.LCPI3_0)(a2) +; RV64M-NEXT: or a1, a5, a1 +; RV64M-NEXT: slli a1, a1, 31 +; RV64M-NEXT: srai a1, a1, 31 +; RV64M-NEXT: mulh a2, a1, a2 +; RV64M-NEXT: srli a5, a2, 63 +; RV64M-NEXT: srai a2, a2, 1 +; RV64M-NEXT: add a2, a2, a5 +; RV64M-NEXT: slli a5, a2, 3 +; RV64M-NEXT: sub a2, a2, a5 +; RV64M-NEXT: lui a5, %hi(.LCPI3_1) +; RV64M-NEXT: ld a5, %lo(.LCPI3_1)(a5) ; RV64M-NEXT: slli a3, a3, 31 ; RV64M-NEXT: srai a3, a3, 31 -; RV64M-NEXT: lui a5, 18725 -; RV64M-NEXT: addiw a5, a5, -1755 -; RV64M-NEXT: slli a5, a5, 12 -; RV64M-NEXT: addi a5, a5, -1755 -; RV64M-NEXT: slli a5, a5, 12 -; RV64M-NEXT: addi a5, a5, -1755 -; RV64M-NEXT: slli a5, a5, 12 -; RV64M-NEXT: addi a5, a5, -1755 -; RV64M-NEXT: mulh a5, a2, a5 -; RV64M-NEXT: srli a1, a5, 63 -; RV64M-NEXT: srai a5, a5, 1 -; RV64M-NEXT: add a1, a5, a1 -; RV64M-NEXT: slli a5, a1, 3 -; RV64M-NEXT: sub a1, a1, a5 -; RV64M-NEXT: add a1, a2, a1 -; RV64M-NEXT: lui a2, 1035469 -; RV64M-NEXT: addiw a2, a2, -819 -; RV64M-NEXT: slli a2, a2, 12 -; RV64M-NEXT: addi a2, a2, -819 -; RV64M-NEXT: slli a2, a2, 12 -; RV64M-NEXT: addi a2, a2, -819 -; RV64M-NEXT: slli a2, a2, 13 -; RV64M-NEXT: addi a2, a2, -1639 -; RV64M-NEXT: mulh a2, a4, a2 +; RV64M-NEXT: add a1, a1, a2 +; RV64M-NEXT: mulh a2, a4, a5 ; RV64M-NEXT: srli a5, a2, 63 ; RV64M-NEXT: srai a2, a2, 1 ; RV64M-NEXT: add a2, a2, a5 @@ -613,30 +577,18 @@ ; RV64M-NEXT: add a2, a4, a2 ; RV64M-NEXT: addi a2, a2, -2 ; RV64M-NEXT: snez a2, a2 +; RV64M-NEXT: lui a4, %hi(.LCPI3_2) +; RV64M-NEXT: ld a4, %lo(.LCPI3_2)(a4) +; RV64M-NEXT: lui a5, %hi(.LCPI3_3) +; RV64M-NEXT: ld a5, %lo(.LCPI3_3)(a5) ; RV64M-NEXT: addi a1, a1, -1 ; RV64M-NEXT: snez a1, a1 -; RV64M-NEXT: lui a4, 1026731 -; RV64M-NEXT: addiw a4, a4, -1365 -; RV64M-NEXT: slli a4, a4, 12 -; RV64M-NEXT: addi a4, a4, -1365 -; RV64M-NEXT: slli a4, a4, 12 -; RV64M-NEXT: addi a4, a4, -1365 -; RV64M-NEXT: slli a4, a4, 12 -; RV64M-NEXT: addi a4, a4, -1365 ; RV64M-NEXT: mul a3, a3, a4 -; RV64M-NEXT: lui a4, 10923 -; RV64M-NEXT: addiw a4, a4, -1365 -; RV64M-NEXT: slli a4, a4, 12 -; RV64M-NEXT: addi a4, a4, -1365 -; RV64M-NEXT: slli a4, a4, 12 -; RV64M-NEXT: addi a4, a4, -1365 -; RV64M-NEXT: slli a4, a4, 12 -; RV64M-NEXT: addi a4, a4, -1366 -; RV64M-NEXT: add a3, a3, a4 -; RV64M-NEXT: slli a5, a3, 63 +; RV64M-NEXT: add a3, a3, a5 +; RV64M-NEXT: slli a4, a3, 63 ; RV64M-NEXT: srli a3, a3, 1 -; RV64M-NEXT: or a3, a3, a5 -; RV64M-NEXT: sltu a3, a4, a3 +; RV64M-NEXT: or a3, a3, a4 +; RV64M-NEXT: sltu a3, a5, a3 ; RV64M-NEXT: neg a1, a1 ; RV64M-NEXT: neg a4, a2 ; RV64M-NEXT: neg a3, a3 @@ -771,60 +723,42 @@ ; RV64MV-NEXT: lb a1, 12(a0) ; RV64MV-NEXT: lwu a2, 8(a0) ; RV64MV-NEXT: slli a1, a1, 32 -; RV64MV-NEXT: or a2, a2, a1 +; RV64MV-NEXT: or a1, a2, a1 ; RV64MV-NEXT: li a6, -1 ; RV64MV-NEXT: ld a3, 0(a0) ; RV64MV-NEXT: srli a4, a6, 24 -; RV64MV-NEXT: and a2, a2, a4 -; RV64MV-NEXT: slli a4, a2, 31 +; RV64MV-NEXT: and a1, a1, a4 +; RV64MV-NEXT: slli a4, a1, 31 ; RV64MV-NEXT: srli a5, a3, 33 ; RV64MV-NEXT: or a4, a5, a4 ; RV64MV-NEXT: slli a4, a4, 31 ; RV64MV-NEXT: srai a4, a4, 31 -; RV64MV-NEXT: slli a2, a2, 29 -; RV64MV-NEXT: srai a2, a2, 31 +; RV64MV-NEXT: lui a5, %hi(.LCPI3_0) +; RV64MV-NEXT: ld a5, %lo(.LCPI3_0)(a5) +; RV64MV-NEXT: slli a1, a1, 29 ; RV64MV-NEXT: slli a3, a3, 31 ; RV64MV-NEXT: srai a3, a3, 31 -; RV64MV-NEXT: lui a5, 10923 -; RV64MV-NEXT: addiw a5, a5, -1365 -; RV64MV-NEXT: slli a5, a5, 12 -; RV64MV-NEXT: addi a5, a5, -1365 -; RV64MV-NEXT: slli a5, a5, 12 -; RV64MV-NEXT: addi a5, a5, -1365 -; RV64MV-NEXT: slli a5, a5, 12 -; RV64MV-NEXT: addi a5, a5, -1365 ; RV64MV-NEXT: mulh a5, a3, a5 -; RV64MV-NEXT: srli a1, a5, 63 -; RV64MV-NEXT: add a1, a5, a1 +; RV64MV-NEXT: srli a2, a5, 63 +; RV64MV-NEXT: add a2, a5, a2 ; RV64MV-NEXT: li a5, 6 -; RV64MV-NEXT: mul a1, a1, a5 -; RV64MV-NEXT: sub a1, a3, a1 -; RV64MV-NEXT: sd a1, 32(sp) -; RV64MV-NEXT: lui a1, 1035469 -; RV64MV-NEXT: addiw a1, a1, -819 -; RV64MV-NEXT: slli a1, a1, 12 -; RV64MV-NEXT: addi a1, a1, -819 -; RV64MV-NEXT: slli a1, a1, 12 -; RV64MV-NEXT: addi a1, a1, -819 -; RV64MV-NEXT: slli a1, a1, 13 -; RV64MV-NEXT: addi a1, a1, -1639 -; RV64MV-NEXT: mulh a1, a2, a1 -; RV64MV-NEXT: srli a3, a1, 63 -; RV64MV-NEXT: srai a1, a1, 1 -; RV64MV-NEXT: add a1, a1, a3 -; RV64MV-NEXT: slli a3, a1, 2 -; RV64MV-NEXT: add a1, a3, a1 -; RV64MV-NEXT: add a1, a2, a1 +; RV64MV-NEXT: mul a2, a2, a5 +; RV64MV-NEXT: lui a5, %hi(.LCPI3_1) +; RV64MV-NEXT: ld a5, %lo(.LCPI3_1)(a5) +; RV64MV-NEXT: srai a1, a1, 31 +; RV64MV-NEXT: sub a2, a3, a2 +; RV64MV-NEXT: sd a2, 32(sp) +; RV64MV-NEXT: mulh a2, a1, a5 +; RV64MV-NEXT: srli a3, a2, 63 +; RV64MV-NEXT: srai a2, a2, 1 +; RV64MV-NEXT: add a2, a2, a3 +; RV64MV-NEXT: slli a3, a2, 2 +; RV64MV-NEXT: lui a5, %hi(.LCPI3_2) +; RV64MV-NEXT: ld a5, %lo(.LCPI3_2)(a5) +; RV64MV-NEXT: add a2, a3, a2 +; RV64MV-NEXT: add a1, a1, a2 ; RV64MV-NEXT: sd a1, 48(sp) -; RV64MV-NEXT: lui a1, 18725 -; RV64MV-NEXT: addiw a1, a1, -1755 -; RV64MV-NEXT: slli a1, a1, 12 -; RV64MV-NEXT: addi a1, a1, -1755 -; RV64MV-NEXT: slli a1, a1, 12 -; RV64MV-NEXT: addi a1, a1, -1755 -; RV64MV-NEXT: slli a1, a1, 12 -; RV64MV-NEXT: addi a1, a1, -1755 -; RV64MV-NEXT: mulh a1, a4, a1 +; RV64MV-NEXT: mulh a1, a4, a5 ; RV64MV-NEXT: srli a2, a1, 63 ; RV64MV-NEXT: srai a1, a1, 1 ; RV64MV-NEXT: add a1, a1, a2 @@ -835,8 +769,8 @@ ; RV64MV-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64MV-NEXT: addi a1, sp, 32 ; RV64MV-NEXT: vle64.v v8, (a1) -; RV64MV-NEXT: lui a1, %hi(.LCPI3_0) -; RV64MV-NEXT: addi a1, a1, %lo(.LCPI3_0) +; RV64MV-NEXT: lui a1, %hi(.LCPI3_3) +; RV64MV-NEXT: addi a1, a1, %lo(.LCPI3_3) ; RV64MV-NEXT: vle64.v v10, (a1) ; RV64MV-NEXT: srli a1, a6, 31 ; RV64MV-NEXT: vand.vx v8, v8, a1 diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll --- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll @@ -149,64 +149,41 @@ ; ; RV64IM-LABEL: fold_srem_vec_1: ; RV64IM: # %bb.0: +; RV64IM-NEXT: lh a2, 0(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI0_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI0_0)(a3) ; RV64IM-NEXT: lh a6, 24(a1) -; RV64IM-NEXT: lh a3, 16(a1) -; RV64IM-NEXT: lh a4, 8(a1) -; RV64IM-NEXT: lh a1, 0(a1) -; RV64IM-NEXT: lui a5, 1045903 -; RV64IM-NEXT: addiw a5, a5, -733 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1035 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, -905 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, -1767 -; RV64IM-NEXT: mulh a5, a1, a5 -; RV64IM-NEXT: add a5, a5, a1 -; RV64IM-NEXT: srli a2, a5, 63 -; RV64IM-NEXT: srli a5, a5, 6 -; RV64IM-NEXT: addw a2, a5, a2 +; RV64IM-NEXT: lh a7, 16(a1) +; RV64IM-NEXT: lh a1, 8(a1) +; RV64IM-NEXT: mulh a3, a2, a3 +; RV64IM-NEXT: add a3, a3, a2 +; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a3, a3, 6 +; RV64IM-NEXT: addw a3, a3, a4 +; RV64IM-NEXT: lui a4, %hi(.LCPI0_1) +; RV64IM-NEXT: ld a4, %lo(.LCPI0_1)(a4) ; RV64IM-NEXT: li a5, 95 -; RV64IM-NEXT: mulw a2, a2, a5 -; RV64IM-NEXT: subw a1, a1, a2 -; RV64IM-NEXT: lui a2, 777976 -; RV64IM-NEXT: addiw a2, a2, -1057 -; RV64IM-NEXT: slli a2, a2, 15 -; RV64IM-NEXT: addi a2, a2, -1057 -; RV64IM-NEXT: slli a2, a2, 14 -; RV64IM-NEXT: addi a2, a2, -529 -; RV64IM-NEXT: srli a2, a2, 1 -; RV64IM-NEXT: mulh a2, a4, a2 -; RV64IM-NEXT: sub a2, a2, a4 -; RV64IM-NEXT: srli a5, a2, 63 -; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: addw a2, a2, a5 +; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: subw a2, a2, a3 +; RV64IM-NEXT: mulh a3, a1, a4 +; RV64IM-NEXT: sub a3, a3, a1 +; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a3, a3, 6 +; RV64IM-NEXT: addw a3, a3, a4 +; RV64IM-NEXT: lui a4, %hi(.LCPI0_2) +; RV64IM-NEXT: ld a4, %lo(.LCPI0_2)(a4) ; RV64IM-NEXT: li a5, -124 -; RV64IM-NEXT: mulw a2, a2, a5 -; RV64IM-NEXT: subw a2, a4, a2 -; RV64IM-NEXT: lui a4, 2675 -; RV64IM-NEXT: addiw a4, a4, -251 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1839 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 167 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1505 -; RV64IM-NEXT: mulh a4, a3, a4 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srli a4, a4, 5 -; RV64IM-NEXT: addw a4, a4, a5 +; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: subw a1, a1, a3 +; RV64IM-NEXT: mulh a3, a7, a4 +; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a3, a3, 5 +; RV64IM-NEXT: addw a3, a3, a4 +; RV64IM-NEXT: lui a4, %hi(.LCPI0_3) +; RV64IM-NEXT: ld a4, %lo(.LCPI0_3)(a4) ; RV64IM-NEXT: li a5, 98 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a3, a3, a4 -; RV64IM-NEXT: lui a4, 1040212 -; RV64IM-NEXT: addiw a4, a4, 1977 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -1907 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -453 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -1213 +; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: subw a3, a7, a3 ; RV64IM-NEXT: mulh a4, a6, a4 ; RV64IM-NEXT: srli a5, a4, 63 ; RV64IM-NEXT: srli a4, a4, 7 @@ -216,8 +193,8 @@ ; RV64IM-NEXT: subw a4, a6, a4 ; RV64IM-NEXT: sh a4, 6(a0) ; RV64IM-NEXT: sh a3, 4(a0) -; RV64IM-NEXT: sh a2, 2(a0) -; RV64IM-NEXT: sh a1, 0(a0) +; RV64IM-NEXT: sh a1, 2(a0) +; RV64IM-NEXT: sh a2, 0(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -357,46 +334,40 @@ ; ; RV64IM-LABEL: fold_srem_vec_2: ; RV64IM: # %bb.0: +; RV64IM-NEXT: lh a2, 0(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI1_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI1_0)(a3) ; RV64IM-NEXT: lh a6, 24(a1) -; RV64IM-NEXT: lh a7, 16(a1) -; RV64IM-NEXT: lh a4, 8(a1) -; RV64IM-NEXT: lh a1, 0(a1) -; RV64IM-NEXT: lui a5, 1045903 -; RV64IM-NEXT: addiw a5, a5, -733 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1035 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, -905 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, -1767 -; RV64IM-NEXT: mulh a2, a1, a5 -; RV64IM-NEXT: add a2, a2, a1 -; RV64IM-NEXT: srli a3, a2, 63 -; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: addw a2, a2, a3 -; RV64IM-NEXT: li a3, 95 -; RV64IM-NEXT: mulw a2, a2, a3 -; RV64IM-NEXT: subw t0, a1, a2 -; RV64IM-NEXT: mulh a2, a4, a5 -; RV64IM-NEXT: add a2, a2, a4 -; RV64IM-NEXT: srli a1, a2, 63 -; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: addw a1, a2, a1 -; RV64IM-NEXT: mulw a1, a1, a3 -; RV64IM-NEXT: subw a1, a4, a1 -; RV64IM-NEXT: mulh a2, a7, a5 -; RV64IM-NEXT: add a2, a2, a7 +; RV64IM-NEXT: lh a5, 16(a1) +; RV64IM-NEXT: lh a1, 8(a1) +; RV64IM-NEXT: mulh a4, a2, a3 +; RV64IM-NEXT: add a4, a4, a2 +; RV64IM-NEXT: srli a7, a4, 63 +; RV64IM-NEXT: srli a4, a4, 6 +; RV64IM-NEXT: addw a4, a4, a7 +; RV64IM-NEXT: li a7, 95 +; RV64IM-NEXT: mulw a4, a4, a7 +; RV64IM-NEXT: subw t0, a2, a4 +; RV64IM-NEXT: mulh a4, a1, a3 +; RV64IM-NEXT: add a4, a4, a1 +; RV64IM-NEXT: srli a2, a4, 63 +; RV64IM-NEXT: srli a4, a4, 6 +; RV64IM-NEXT: addw a2, a4, a2 +; RV64IM-NEXT: mulw a2, a2, a7 +; RV64IM-NEXT: subw a1, a1, a2 +; RV64IM-NEXT: mulh a2, a5, a3 +; RV64IM-NEXT: add a2, a2, a5 ; RV64IM-NEXT: srli a4, a2, 63 ; RV64IM-NEXT: srli a2, a2, 6 ; RV64IM-NEXT: addw a2, a2, a4 -; RV64IM-NEXT: mulw a2, a2, a3 -; RV64IM-NEXT: subw a2, a7, a2 -; RV64IM-NEXT: mulh a4, a6, a5 -; RV64IM-NEXT: add a4, a4, a6 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srli a4, a4, 6 -; RV64IM-NEXT: addw a4, a4, a5 -; RV64IM-NEXT: mulw a3, a4, a3 +; RV64IM-NEXT: mulw a2, a2, a7 +; RV64IM-NEXT: subw a2, a5, a2 +; RV64IM-NEXT: mulh a3, a6, a3 +; RV64IM-NEXT: add a3, a3, a6 +; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a3, a3, 6 +; RV64IM-NEXT: addw a3, a3, a4 +; RV64IM-NEXT: mulw a3, a3, a7 ; RV64IM-NEXT: subw a3, a6, a3 ; RV64IM-NEXT: sh a3, 6(a0) ; RV64IM-NEXT: sh a2, 4(a0) @@ -603,55 +574,49 @@ ; ; RV64IM-LABEL: combine_srem_sdiv: ; RV64IM: # %bb.0: +; RV64IM-NEXT: lh a2, 24(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI2_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI2_0)(a3) ; RV64IM-NEXT: lh a6, 0(a1) -; RV64IM-NEXT: lh a7, 8(a1) -; RV64IM-NEXT: lh a4, 16(a1) -; RV64IM-NEXT: lh a1, 24(a1) -; RV64IM-NEXT: lui a5, 1045903 -; RV64IM-NEXT: addiw a5, a5, -733 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1035 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, -905 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, -1767 -; RV64IM-NEXT: mulh a2, a1, a5 -; RV64IM-NEXT: add a2, a2, a1 -; RV64IM-NEXT: srli a3, a2, 63 -; RV64IM-NEXT: srai a2, a2, 6 -; RV64IM-NEXT: addw t3, a2, a3 -; RV64IM-NEXT: li t0, 95 -; RV64IM-NEXT: mulw a3, t3, t0 -; RV64IM-NEXT: subw t1, a1, a3 -; RV64IM-NEXT: mulh a3, a4, a5 -; RV64IM-NEXT: add a3, a3, a4 -; RV64IM-NEXT: srli a1, a3, 63 -; RV64IM-NEXT: srai a3, a3, 6 -; RV64IM-NEXT: addw a1, a3, a1 -; RV64IM-NEXT: mulw a3, a1, t0 -; RV64IM-NEXT: subw t2, a4, a3 -; RV64IM-NEXT: mulh a4, a7, a5 -; RV64IM-NEXT: add a4, a4, a7 -; RV64IM-NEXT: srli a3, a4, 63 +; RV64IM-NEXT: lh a5, 8(a1) +; RV64IM-NEXT: lh a1, 16(a1) +; RV64IM-NEXT: mulh a4, a2, a3 +; RV64IM-NEXT: add a4, a4, a2 +; RV64IM-NEXT: srli a7, a4, 63 +; RV64IM-NEXT: srai a4, a4, 6 +; RV64IM-NEXT: addw t0, a4, a7 +; RV64IM-NEXT: li a7, 95 +; RV64IM-NEXT: mulw a4, t0, a7 +; RV64IM-NEXT: subw t1, a2, a4 +; RV64IM-NEXT: mulh a4, a1, a3 +; RV64IM-NEXT: add a4, a4, a1 +; RV64IM-NEXT: srli a2, a4, 63 ; RV64IM-NEXT: srai a4, a4, 6 -; RV64IM-NEXT: addw a3, a4, a3 -; RV64IM-NEXT: mulw a4, a3, t0 -; RV64IM-NEXT: subw a4, a7, a4 -; RV64IM-NEXT: mulh a5, a6, a5 -; RV64IM-NEXT: add a5, a5, a6 -; RV64IM-NEXT: srli a2, a5, 63 -; RV64IM-NEXT: srai a5, a5, 6 -; RV64IM-NEXT: addw a2, a5, a2 -; RV64IM-NEXT: mulw a5, a2, t0 +; RV64IM-NEXT: addw a2, a4, a2 +; RV64IM-NEXT: mulw a4, a2, a7 +; RV64IM-NEXT: subw t2, a1, a4 +; RV64IM-NEXT: mulh a4, a5, a3 +; RV64IM-NEXT: add a4, a4, a5 +; RV64IM-NEXT: srli a1, a4, 63 +; RV64IM-NEXT: srai a4, a4, 6 +; RV64IM-NEXT: addw a1, a4, a1 +; RV64IM-NEXT: mulw a4, a1, a7 +; RV64IM-NEXT: subw a4, a5, a4 +; RV64IM-NEXT: mulh a3, a6, a3 +; RV64IM-NEXT: add a3, a3, a6 +; RV64IM-NEXT: srli a5, a3, 63 +; RV64IM-NEXT: srai a3, a3, 6 +; RV64IM-NEXT: addw a3, a3, a5 +; RV64IM-NEXT: mulw a5, a3, a7 ; RV64IM-NEXT: subw a5, a6, a5 -; RV64IM-NEXT: addw a2, a5, a2 -; RV64IM-NEXT: addw a3, a4, a3 -; RV64IM-NEXT: addw a1, t2, a1 -; RV64IM-NEXT: addw a4, t1, t3 +; RV64IM-NEXT: addw a3, a5, a3 +; RV64IM-NEXT: addw a1, a4, a1 +; RV64IM-NEXT: addw a2, t2, a2 +; RV64IM-NEXT: addw a4, t1, t0 ; RV64IM-NEXT: sh a4, 6(a0) -; RV64IM-NEXT: sh a1, 4(a0) -; RV64IM-NEXT: sh a3, 2(a0) -; RV64IM-NEXT: sh a2, 0(a0) +; RV64IM-NEXT: sh a2, 4(a0) +; RV64IM-NEXT: sh a1, 2(a0) +; RV64IM-NEXT: sh a3, 0(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, %2 = sdiv <4 x i16> %x, @@ -775,42 +740,36 @@ ; ; RV64IM-LABEL: dont_fold_srem_power_of_two: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lh a6, 16(a1) -; RV64IM-NEXT: lh a3, 8(a1) -; RV64IM-NEXT: lh a4, 0(a1) -; RV64IM-NEXT: lh a1, 24(a1) -; RV64IM-NEXT: lui a5, 1045903 -; RV64IM-NEXT: addiw a5, a5, -733 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1035 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, -905 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, -1767 -; RV64IM-NEXT: mulh a5, a1, a5 -; RV64IM-NEXT: add a5, a5, a1 -; RV64IM-NEXT: srli a2, a5, 63 -; RV64IM-NEXT: srli a5, a5, 6 -; RV64IM-NEXT: addw a2, a5, a2 -; RV64IM-NEXT: li a5, 95 -; RV64IM-NEXT: mulw a2, a2, a5 -; RV64IM-NEXT: subw a1, a1, a2 -; RV64IM-NEXT: srli a2, a4, 58 -; RV64IM-NEXT: add a2, a4, a2 -; RV64IM-NEXT: andi a2, a2, -64 -; RV64IM-NEXT: subw a2, a4, a2 -; RV64IM-NEXT: srli a4, a3, 59 -; RV64IM-NEXT: add a4, a3, a4 -; RV64IM-NEXT: andi a4, a4, -32 -; RV64IM-NEXT: subw a3, a3, a4 -; RV64IM-NEXT: srli a4, a6, 61 -; RV64IM-NEXT: add a4, a6, a4 -; RV64IM-NEXT: andi a4, a4, -8 -; RV64IM-NEXT: subw a4, a6, a4 +; RV64IM-NEXT: lh a2, 24(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI3_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI3_0)(a3) +; RV64IM-NEXT: lh a4, 16(a1) +; RV64IM-NEXT: lh a5, 8(a1) +; RV64IM-NEXT: lh a1, 0(a1) +; RV64IM-NEXT: mulh a3, a2, a3 +; RV64IM-NEXT: add a3, a3, a2 +; RV64IM-NEXT: srli a6, a3, 63 +; RV64IM-NEXT: srli a3, a3, 6 +; RV64IM-NEXT: addw a6, a3, a6 +; RV64IM-NEXT: li a3, 95 +; RV64IM-NEXT: mulw a3, a6, a3 +; RV64IM-NEXT: subw a2, a2, a3 +; RV64IM-NEXT: srli a3, a1, 58 +; RV64IM-NEXT: add a3, a1, a3 +; RV64IM-NEXT: andi a3, a3, -64 +; RV64IM-NEXT: subw a1, a1, a3 +; RV64IM-NEXT: srli a3, a5, 59 +; RV64IM-NEXT: add a3, a5, a3 +; RV64IM-NEXT: andi a3, a3, -32 +; RV64IM-NEXT: subw a3, a5, a3 +; RV64IM-NEXT: srli a5, a4, 61 +; RV64IM-NEXT: add a5, a4, a5 +; RV64IM-NEXT: andi a5, a5, -8 +; RV64IM-NEXT: subw a4, a4, a5 ; RV64IM-NEXT: sh a4, 4(a0) ; RV64IM-NEXT: sh a3, 2(a0) -; RV64IM-NEXT: sh a2, 0(a0) -; RV64IM-NEXT: sh a1, 6(a0) +; RV64IM-NEXT: sh a1, 0(a0) +; RV64IM-NEXT: sh a2, 6(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -933,60 +892,42 @@ ; ; RV64IM-LABEL: dont_fold_srem_one: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lh a2, 24(a1) -; RV64IM-NEXT: lh a3, 8(a1) -; RV64IM-NEXT: lh a1, 16(a1) -; RV64IM-NEXT: lui a4, 1043590 -; RV64IM-NEXT: addiw a4, a4, -1781 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1069 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -1959 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 357 -; RV64IM-NEXT: mulh a4, a1, a4 -; RV64IM-NEXT: add a4, a4, a1 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srli a4, a4, 4 -; RV64IM-NEXT: addw a4, a4, a5 -; RV64IM-NEXT: li a5, 23 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a1, a1, a4 -; RV64IM-NEXT: lui a4, 6413 -; RV64IM-NEXT: addiw a4, a4, 1265 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1027 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1077 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 965 -; RV64IM-NEXT: mulh a4, a3, a4 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srli a4, a4, 8 -; RV64IM-NEXT: addw a4, a4, a5 +; RV64IM-NEXT: lh a2, 16(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI4_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI4_0)(a3) +; RV64IM-NEXT: lh a6, 24(a1) +; RV64IM-NEXT: lh a1, 8(a1) +; RV64IM-NEXT: mulh a3, a2, a3 +; RV64IM-NEXT: add a3, a3, a2 +; RV64IM-NEXT: srli a5, a3, 63 +; RV64IM-NEXT: srli a3, a3, 4 +; RV64IM-NEXT: addw a3, a3, a5 +; RV64IM-NEXT: lui a5, %hi(.LCPI4_1) +; RV64IM-NEXT: ld a5, %lo(.LCPI4_1)(a5) +; RV64IM-NEXT: li a4, 23 +; RV64IM-NEXT: mulw a3, a3, a4 +; RV64IM-NEXT: subw a2, a2, a3 +; RV64IM-NEXT: mulh a3, a1, a5 +; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a3, a3, 8 +; RV64IM-NEXT: addw a3, a3, a4 +; RV64IM-NEXT: lui a4, %hi(.LCPI4_2) +; RV64IM-NEXT: ld a4, %lo(.LCPI4_2)(a4) ; RV64IM-NEXT: li a5, 654 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a3, a3, a4 -; RV64IM-NEXT: lui a4, 12375 -; RV64IM-NEXT: addiw a4, a4, -575 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 883 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, -431 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 1959 -; RV64IM-NEXT: mulh a4, a2, a4 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srli a4, a4, 11 -; RV64IM-NEXT: addw a4, a4, a5 -; RV64IM-NEXT: lui a5, 1 -; RV64IM-NEXT: addiw a5, a5, 1327 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a2, a2, a4 +; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: subw a1, a1, a3 +; RV64IM-NEXT: mulh a3, a6, a4 +; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a3, a3, 11 +; RV64IM-NEXT: addw a3, a3, a4 +; RV64IM-NEXT: lui a4, 1 +; RV64IM-NEXT: addiw a4, a4, 1327 +; RV64IM-NEXT: mulw a3, a3, a4 +; RV64IM-NEXT: subw a3, a6, a3 ; RV64IM-NEXT: sh zero, 0(a0) -; RV64IM-NEXT: sh a2, 6(a0) -; RV64IM-NEXT: sh a3, 2(a0) -; RV64IM-NEXT: sh a1, 4(a0) +; RV64IM-NEXT: sh a3, 6(a0) +; RV64IM-NEXT: sh a1, 2(a0) +; RV64IM-NEXT: sh a2, 4(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -1104,50 +1045,38 @@ ; ; RV64IM-LABEL: dont_fold_urem_i16_smax: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lh a2, 8(a1) -; RV64IM-NEXT: lh a3, 24(a1) -; RV64IM-NEXT: lh a1, 16(a1) -; RV64IM-NEXT: lui a4, 1043590 -; RV64IM-NEXT: addiw a4, a4, -1781 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1069 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -1959 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 357 -; RV64IM-NEXT: mulh a4, a1, a4 -; RV64IM-NEXT: add a4, a4, a1 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srli a4, a4, 4 -; RV64IM-NEXT: addw a4, a4, a5 -; RV64IM-NEXT: li a5, 23 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a1, a1, a4 -; RV64IM-NEXT: lui a4, 12375 -; RV64IM-NEXT: addiw a4, a4, -575 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 883 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, -431 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 1959 -; RV64IM-NEXT: mulh a4, a3, a4 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srli a4, a4, 11 -; RV64IM-NEXT: addw a4, a4, a5 +; RV64IM-NEXT: lh a2, 16(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI5_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI5_0)(a3) +; RV64IM-NEXT: lh a4, 24(a1) +; RV64IM-NEXT: mulh a3, a2, a3 +; RV64IM-NEXT: add a3, a3, a2 +; RV64IM-NEXT: srli a5, a3, 63 +; RV64IM-NEXT: srli a3, a3, 4 +; RV64IM-NEXT: addw a3, a3, a5 +; RV64IM-NEXT: li a6, 23 +; RV64IM-NEXT: lui a5, %hi(.LCPI5_1) +; RV64IM-NEXT: ld a5, %lo(.LCPI5_1)(a5) +; RV64IM-NEXT: mulw a3, a3, a6 +; RV64IM-NEXT: lh a1, 8(a1) +; RV64IM-NEXT: subw a2, a2, a3 +; RV64IM-NEXT: mulh a3, a4, a5 +; RV64IM-NEXT: srli a5, a3, 63 +; RV64IM-NEXT: srli a3, a3, 11 +; RV64IM-NEXT: addw a3, a3, a5 ; RV64IM-NEXT: lui a5, 1 ; RV64IM-NEXT: addiw a5, a5, 1327 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a3, a3, a4 -; RV64IM-NEXT: srli a4, a2, 49 -; RV64IM-NEXT: add a4, a2, a4 +; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: subw a3, a4, a3 +; RV64IM-NEXT: srli a4, a1, 49 +; RV64IM-NEXT: add a4, a1, a4 ; RV64IM-NEXT: lui a5, 8 ; RV64IM-NEXT: and a4, a4, a5 -; RV64IM-NEXT: subw a2, a2, a4 +; RV64IM-NEXT: subw a1, a1, a4 ; RV64IM-NEXT: sh zero, 0(a0) -; RV64IM-NEXT: sh a2, 2(a0) +; RV64IM-NEXT: sh a1, 2(a0) ; RV64IM-NEXT: sh a3, 6(a0) -; RV64IM-NEXT: sh a1, 4(a0) +; RV64IM-NEXT: sh a2, 4(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -1335,60 +1264,42 @@ ; ; RV64IM-LABEL: dont_fold_srem_i64: ; RV64IM: # %bb.0: -; RV64IM-NEXT: ld a2, 24(a1) -; RV64IM-NEXT: ld a3, 8(a1) -; RV64IM-NEXT: ld a1, 16(a1) -; RV64IM-NEXT: lui a4, 1043590 -; RV64IM-NEXT: addiw a4, a4, -1781 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1069 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -1959 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 357 -; RV64IM-NEXT: mulh a4, a1, a4 -; RV64IM-NEXT: add a4, a4, a1 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srai a4, a4, 4 -; RV64IM-NEXT: add a4, a4, a5 -; RV64IM-NEXT: li a5, 23 -; RV64IM-NEXT: mul a4, a4, a5 -; RV64IM-NEXT: sub a1, a1, a4 -; RV64IM-NEXT: lui a4, 6413 -; RV64IM-NEXT: addiw a4, a4, 1265 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1027 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1077 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 965 -; RV64IM-NEXT: mulh a4, a3, a4 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srai a4, a4, 8 -; RV64IM-NEXT: add a4, a4, a5 +; RV64IM-NEXT: ld a2, 16(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI6_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI6_0)(a3) +; RV64IM-NEXT: ld a6, 24(a1) +; RV64IM-NEXT: ld a1, 8(a1) +; RV64IM-NEXT: mulh a3, a2, a3 +; RV64IM-NEXT: add a3, a3, a2 +; RV64IM-NEXT: srli a5, a3, 63 +; RV64IM-NEXT: srai a3, a3, 4 +; RV64IM-NEXT: add a3, a3, a5 +; RV64IM-NEXT: lui a5, %hi(.LCPI6_1) +; RV64IM-NEXT: ld a5, %lo(.LCPI6_1)(a5) +; RV64IM-NEXT: li a4, 23 +; RV64IM-NEXT: mul a3, a3, a4 +; RV64IM-NEXT: sub a2, a2, a3 +; RV64IM-NEXT: mulh a3, a1, a5 +; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srai a3, a3, 8 +; RV64IM-NEXT: add a3, a3, a4 +; RV64IM-NEXT: lui a4, %hi(.LCPI6_2) +; RV64IM-NEXT: ld a4, %lo(.LCPI6_2)(a4) ; RV64IM-NEXT: li a5, 654 -; RV64IM-NEXT: mul a4, a4, a5 -; RV64IM-NEXT: sub a3, a3, a4 -; RV64IM-NEXT: lui a4, 12375 -; RV64IM-NEXT: addiw a4, a4, -575 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 883 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, -431 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 1959 -; RV64IM-NEXT: mulh a4, a2, a4 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srai a4, a4, 11 -; RV64IM-NEXT: add a4, a4, a5 -; RV64IM-NEXT: lui a5, 1 -; RV64IM-NEXT: addiw a5, a5, 1327 -; RV64IM-NEXT: mul a4, a4, a5 -; RV64IM-NEXT: sub a2, a2, a4 +; RV64IM-NEXT: mul a3, a3, a5 +; RV64IM-NEXT: sub a1, a1, a3 +; RV64IM-NEXT: mulh a3, a6, a4 +; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srai a3, a3, 11 +; RV64IM-NEXT: add a3, a3, a4 +; RV64IM-NEXT: lui a4, 1 +; RV64IM-NEXT: addiw a4, a4, 1327 +; RV64IM-NEXT: mul a3, a3, a4 +; RV64IM-NEXT: sub a3, a6, a3 ; RV64IM-NEXT: sd zero, 0(a0) -; RV64IM-NEXT: sd a2, 24(a0) -; RV64IM-NEXT: sd a3, 8(a0) -; RV64IM-NEXT: sd a1, 16(a0) +; RV64IM-NEXT: sd a3, 24(a0) +; RV64IM-NEXT: sd a1, 8(a0) +; RV64IM-NEXT: sd a2, 16(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i64> %x, ret <4 x i64> %1 diff --git a/llvm/test/CodeGen/RISCV/urem-lkk.ll b/llvm/test/CodeGen/RISCV/urem-lkk.ll --- a/llvm/test/CodeGen/RISCV/urem-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-lkk.ll @@ -264,16 +264,10 @@ ; ; RV64IM-LABEL: dont_fold_urem_i64: ; RV64IM: # %bb.0: -; RV64IM-NEXT: srli a1, a0, 1 -; RV64IM-NEXT: lui a2, 2675 -; RV64IM-NEXT: addiw a2, a2, -251 -; RV64IM-NEXT: slli a2, a2, 13 -; RV64IM-NEXT: addi a2, a2, 1839 -; RV64IM-NEXT: slli a2, a2, 13 -; RV64IM-NEXT: addi a2, a2, 167 -; RV64IM-NEXT: slli a2, a2, 13 -; RV64IM-NEXT: addi a2, a2, 1505 -; RV64IM-NEXT: mulhu a1, a1, a2 +; RV64IM-NEXT: lui a1, %hi(.LCPI6_0) +; RV64IM-NEXT: ld a1, %lo(.LCPI6_0)(a1) +; RV64IM-NEXT: srli a2, a0, 1 +; RV64IM-NEXT: mulhu a1, a2, a1 ; RV64IM-NEXT: srli a1, a1, 4 ; RV64IM-NEXT: li a2, 98 ; RV64IM-NEXT: mul a1, a1, a2 diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll --- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll @@ -144,71 +144,47 @@ ; ; RV64IM-LABEL: fold_urem_vec_1: ; RV64IM: # %bb.0: +; RV64IM-NEXT: lhu a2, 0(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI0_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI0_0)(a3) ; RV64IM-NEXT: lhu a6, 24(a1) -; RV64IM-NEXT: lhu a3, 16(a1) -; RV64IM-NEXT: lhu a4, 8(a1) -; RV64IM-NEXT: lhu a1, 0(a1) -; RV64IM-NEXT: lui a5, 1423 -; RV64IM-NEXT: addiw a5, a5, -733 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1035 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, -1811 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, 561 -; RV64IM-NEXT: mulhu a5, a1, a5 -; RV64IM-NEXT: sub a2, a1, a5 -; RV64IM-NEXT: srli a2, a2, 1 -; RV64IM-NEXT: add a2, a2, a5 -; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: li a5, 95 -; RV64IM-NEXT: mulw a2, a2, a5 -; RV64IM-NEXT: subw a1, a1, a2 -; RV64IM-NEXT: srli a2, a4, 2 -; RV64IM-NEXT: lui a5, 264 -; RV64IM-NEXT: addiw a5, a5, 1057 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1057 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1057 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, 133 -; RV64IM-NEXT: mulhu a2, a2, a5 -; RV64IM-NEXT: srli a2, a2, 3 -; RV64IM-NEXT: li a5, 124 -; RV64IM-NEXT: mulw a2, a2, a5 -; RV64IM-NEXT: subw a2, a4, a2 -; RV64IM-NEXT: srli a4, a3, 1 -; RV64IM-NEXT: lui a5, 2675 -; RV64IM-NEXT: addiw a5, a5, -251 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, 1839 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, 167 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, 1505 -; RV64IM-NEXT: mulhu a4, a4, a5 -; RV64IM-NEXT: srli a4, a4, 4 -; RV64IM-NEXT: li a5, 98 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a3, a3, a4 -; RV64IM-NEXT: lui a4, 8364 -; RV64IM-NEXT: addiw a4, a4, -1977 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 1907 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 453 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 1213 -; RV64IM-NEXT: mulhu a4, a6, a4 -; RV64IM-NEXT: srli a4, a4, 7 -; RV64IM-NEXT: li a5, 1003 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a4, a6, a4 -; RV64IM-NEXT: sh a4, 6(a0) -; RV64IM-NEXT: sh a3, 4(a0) -; RV64IM-NEXT: sh a2, 2(a0) -; RV64IM-NEXT: sh a1, 0(a0) +; RV64IM-NEXT: lhu a5, 16(a1) +; RV64IM-NEXT: lhu a1, 8(a1) +; RV64IM-NEXT: mulhu a3, a2, a3 +; RV64IM-NEXT: sub a4, a2, a3 +; RV64IM-NEXT: srli a4, a4, 1 +; RV64IM-NEXT: add a3, a4, a3 +; RV64IM-NEXT: srli a3, a3, 6 +; RV64IM-NEXT: li a7, 95 +; RV64IM-NEXT: lui a4, %hi(.LCPI0_1) +; RV64IM-NEXT: ld a4, %lo(.LCPI0_1)(a4) +; RV64IM-NEXT: mulw a3, a3, a7 +; RV64IM-NEXT: subw t0, a2, a3 +; RV64IM-NEXT: srli a3, a1, 2 +; RV64IM-NEXT: mulhu a3, a3, a4 +; RV64IM-NEXT: srli a3, a3, 3 +; RV64IM-NEXT: li a7, 124 +; RV64IM-NEXT: lui a4, %hi(.LCPI0_2) +; RV64IM-NEXT: ld a4, %lo(.LCPI0_2)(a4) +; RV64IM-NEXT: mulw a3, a3, a7 +; RV64IM-NEXT: subw a1, a1, a3 +; RV64IM-NEXT: srli a3, a5, 1 +; RV64IM-NEXT: mulhu a3, a3, a4 +; RV64IM-NEXT: srli a3, a3, 4 +; RV64IM-NEXT: lui a4, %hi(.LCPI0_3) +; RV64IM-NEXT: ld a4, %lo(.LCPI0_3)(a4) +; RV64IM-NEXT: li a2, 98 +; RV64IM-NEXT: mulw a2, a3, a2 +; RV64IM-NEXT: subw a2, a5, a2 +; RV64IM-NEXT: mulhu a3, a6, a4 +; RV64IM-NEXT: srli a3, a3, 7 +; RV64IM-NEXT: li a4, 1003 +; RV64IM-NEXT: mulw a3, a3, a4 +; RV64IM-NEXT: subw a3, a6, a3 +; RV64IM-NEXT: sh a3, 6(a0) +; RV64IM-NEXT: sh a2, 4(a0) +; RV64IM-NEXT: sh a1, 2(a0) +; RV64IM-NEXT: sh t0, 0(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -348,46 +324,40 @@ ; ; RV64IM-LABEL: fold_urem_vec_2: ; RV64IM: # %bb.0: +; RV64IM-NEXT: lhu a2, 0(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI1_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI1_0)(a3) ; RV64IM-NEXT: lhu a6, 24(a1) ; RV64IM-NEXT: lhu a7, 16(a1) -; RV64IM-NEXT: lhu a4, 8(a1) -; RV64IM-NEXT: lhu a1, 0(a1) -; RV64IM-NEXT: lui a5, 1423 -; RV64IM-NEXT: addiw a5, a5, -733 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1035 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, -1811 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, 561 -; RV64IM-NEXT: mulhu a2, a1, a5 -; RV64IM-NEXT: sub a3, a1, a2 -; RV64IM-NEXT: srli a3, a3, 1 -; RV64IM-NEXT: add a2, a3, a2 +; RV64IM-NEXT: lhu a1, 8(a1) +; RV64IM-NEXT: mulhu a4, a2, a3 +; RV64IM-NEXT: sub a5, a2, a4 +; RV64IM-NEXT: srli a5, a5, 1 +; RV64IM-NEXT: add a4, a5, a4 +; RV64IM-NEXT: srli a4, a4, 6 +; RV64IM-NEXT: li a5, 95 +; RV64IM-NEXT: mulw a4, a4, a5 +; RV64IM-NEXT: subw t0, a2, a4 +; RV64IM-NEXT: mulhu a4, a1, a3 +; RV64IM-NEXT: sub a2, a1, a4 +; RV64IM-NEXT: srli a2, a2, 1 +; RV64IM-NEXT: add a2, a2, a4 ; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: li a3, 95 -; RV64IM-NEXT: mulw a2, a2, a3 -; RV64IM-NEXT: subw t0, a1, a2 -; RV64IM-NEXT: mulhu a2, a4, a5 -; RV64IM-NEXT: sub a1, a4, a2 -; RV64IM-NEXT: srli a1, a1, 1 -; RV64IM-NEXT: add a1, a1, a2 -; RV64IM-NEXT: srli a1, a1, 6 -; RV64IM-NEXT: mulw a1, a1, a3 -; RV64IM-NEXT: subw a1, a4, a1 -; RV64IM-NEXT: mulhu a2, a7, a5 +; RV64IM-NEXT: mulw a2, a2, a5 +; RV64IM-NEXT: subw a1, a1, a2 +; RV64IM-NEXT: mulhu a2, a7, a3 ; RV64IM-NEXT: sub a4, a7, a2 ; RV64IM-NEXT: srli a4, a4, 1 ; RV64IM-NEXT: add a2, a4, a2 ; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: mulw a2, a2, a3 +; RV64IM-NEXT: mulw a2, a2, a5 ; RV64IM-NEXT: subw a2, a7, a2 -; RV64IM-NEXT: mulhu a4, a6, a5 -; RV64IM-NEXT: sub a5, a6, a4 -; RV64IM-NEXT: srli a5, a5, 1 -; RV64IM-NEXT: add a4, a5, a4 -; RV64IM-NEXT: srli a4, a4, 6 -; RV64IM-NEXT: mulw a3, a4, a3 +; RV64IM-NEXT: mulhu a3, a6, a3 +; RV64IM-NEXT: sub a4, a6, a3 +; RV64IM-NEXT: srli a4, a4, 1 +; RV64IM-NEXT: add a3, a4, a3 +; RV64IM-NEXT: srli a3, a3, 6 +; RV64IM-NEXT: mulw a3, a3, a5 ; RV64IM-NEXT: subw a3, a6, a3 ; RV64IM-NEXT: sh a3, 6(a0) ; RV64IM-NEXT: sh a2, 4(a0) @@ -594,55 +564,49 @@ ; ; RV64IM-LABEL: combine_urem_udiv: ; RV64IM: # %bb.0: +; RV64IM-NEXT: lhu a2, 24(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI2_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI2_0)(a3) ; RV64IM-NEXT: lhu a6, 0(a1) ; RV64IM-NEXT: lhu a7, 8(a1) -; RV64IM-NEXT: lhu a4, 16(a1) -; RV64IM-NEXT: lhu a1, 24(a1) -; RV64IM-NEXT: lui a5, 1423 -; RV64IM-NEXT: addiw a5, a5, -733 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1035 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, -1811 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, 561 -; RV64IM-NEXT: mulhu a2, a1, a5 -; RV64IM-NEXT: sub a3, a1, a2 -; RV64IM-NEXT: srli a3, a3, 1 -; RV64IM-NEXT: add a2, a3, a2 -; RV64IM-NEXT: srli t3, a2, 6 +; RV64IM-NEXT: lhu a1, 16(a1) +; RV64IM-NEXT: mulhu a4, a2, a3 +; RV64IM-NEXT: sub a5, a2, a4 +; RV64IM-NEXT: srli a5, a5, 1 +; RV64IM-NEXT: add a4, a5, a4 +; RV64IM-NEXT: srli t3, a4, 6 ; RV64IM-NEXT: li t0, 95 -; RV64IM-NEXT: mulw a3, t3, t0 -; RV64IM-NEXT: subw t1, a1, a3 -; RV64IM-NEXT: mulhu a3, a4, a5 -; RV64IM-NEXT: sub a1, a4, a3 -; RV64IM-NEXT: srli a1, a1, 1 -; RV64IM-NEXT: add a1, a1, a3 -; RV64IM-NEXT: srli a1, a1, 6 -; RV64IM-NEXT: mulw a3, a1, t0 -; RV64IM-NEXT: subw t2, a4, a3 -; RV64IM-NEXT: mulhu a4, a7, a5 -; RV64IM-NEXT: sub a3, a7, a4 -; RV64IM-NEXT: srli a3, a3, 1 -; RV64IM-NEXT: add a3, a3, a4 -; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: mulw a4, a3, t0 -; RV64IM-NEXT: subw a4, a7, a4 -; RV64IM-NEXT: mulhu a5, a6, a5 -; RV64IM-NEXT: sub a2, a6, a5 +; RV64IM-NEXT: mulw a5, t3, t0 +; RV64IM-NEXT: subw t1, a2, a5 +; RV64IM-NEXT: mulhu a5, a1, a3 +; RV64IM-NEXT: sub a2, a1, a5 ; RV64IM-NEXT: srli a2, a2, 1 ; RV64IM-NEXT: add a2, a2, a5 ; RV64IM-NEXT: srli a2, a2, 6 ; RV64IM-NEXT: mulw a5, a2, t0 -; RV64IM-NEXT: subw a5, a6, a5 -; RV64IM-NEXT: addw a2, a5, a2 +; RV64IM-NEXT: subw t2, a1, a5 +; RV64IM-NEXT: mulhu a5, a7, a3 +; RV64IM-NEXT: sub a1, a7, a5 +; RV64IM-NEXT: srli a1, a1, 1 +; RV64IM-NEXT: add a1, a1, a5 +; RV64IM-NEXT: srli a1, a1, 6 +; RV64IM-NEXT: mulw a5, a1, t0 +; RV64IM-NEXT: subw a5, a7, a5 +; RV64IM-NEXT: mulhu a3, a6, a3 +; RV64IM-NEXT: sub a4, a6, a3 +; RV64IM-NEXT: srli a4, a4, 1 +; RV64IM-NEXT: add a3, a4, a3 +; RV64IM-NEXT: srli a3, a3, 6 +; RV64IM-NEXT: mulw a4, a3, t0 +; RV64IM-NEXT: subw a4, a6, a4 ; RV64IM-NEXT: addw a3, a4, a3 -; RV64IM-NEXT: addw a1, t2, a1 +; RV64IM-NEXT: addw a1, a5, a1 +; RV64IM-NEXT: addw a2, t2, a2 ; RV64IM-NEXT: addw a4, t1, t3 ; RV64IM-NEXT: sh a4, 6(a0) -; RV64IM-NEXT: sh a1, 4(a0) -; RV64IM-NEXT: sh a3, 2(a0) -; RV64IM-NEXT: sh a2, 0(a0) +; RV64IM-NEXT: sh a2, 4(a0) +; RV64IM-NEXT: sh a1, 2(a0) +; RV64IM-NEXT: sh a3, 0(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, %2 = udiv <4 x i16> %x, @@ -741,33 +705,27 @@ ; ; RV64IM-LABEL: dont_fold_urem_power_of_two: ; RV64IM: # %bb.0: +; RV64IM-NEXT: lhu a2, 24(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI3_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI3_0)(a3) ; RV64IM-NEXT: lhu a6, 16(a1) -; RV64IM-NEXT: lhu a3, 8(a1) -; RV64IM-NEXT: lhu a4, 0(a1) -; RV64IM-NEXT: lhu a1, 24(a1) -; RV64IM-NEXT: lui a5, 1423 -; RV64IM-NEXT: addiw a5, a5, -733 -; RV64IM-NEXT: slli a5, a5, 15 -; RV64IM-NEXT: addi a5, a5, 1035 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, -1811 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, 561 -; RV64IM-NEXT: mulhu a5, a1, a5 -; RV64IM-NEXT: sub a2, a1, a5 -; RV64IM-NEXT: srli a2, a2, 1 -; RV64IM-NEXT: add a2, a2, a5 -; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: li a5, 95 -; RV64IM-NEXT: mulw a2, a2, a5 -; RV64IM-NEXT: subw a1, a1, a2 -; RV64IM-NEXT: andi a2, a4, 63 -; RV64IM-NEXT: andi a3, a3, 31 +; RV64IM-NEXT: lhu a5, 8(a1) +; RV64IM-NEXT: lhu a1, 0(a1) +; RV64IM-NEXT: mulhu a3, a2, a3 +; RV64IM-NEXT: sub a4, a2, a3 +; RV64IM-NEXT: srli a4, a4, 1 +; RV64IM-NEXT: add a3, a4, a3 +; RV64IM-NEXT: srli a3, a3, 6 +; RV64IM-NEXT: li a4, 95 +; RV64IM-NEXT: mulw a3, a3, a4 +; RV64IM-NEXT: subw a2, a2, a3 +; RV64IM-NEXT: andi a1, a1, 63 +; RV64IM-NEXT: andi a3, a5, 31 ; RV64IM-NEXT: andi a4, a6, 7 ; RV64IM-NEXT: sh a4, 4(a0) ; RV64IM-NEXT: sh a3, 2(a0) -; RV64IM-NEXT: sh a2, 0(a0) -; RV64IM-NEXT: sh a1, 6(a0) +; RV64IM-NEXT: sh a1, 0(a0) +; RV64IM-NEXT: sh a2, 6(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -883,57 +841,39 @@ ; ; RV64IM-LABEL: dont_fold_urem_one: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lhu a2, 24(a1) -; RV64IM-NEXT: lhu a3, 8(a1) -; RV64IM-NEXT: lhu a1, 16(a1) -; RV64IM-NEXT: lui a4, 3206 -; RV64IM-NEXT: addiw a4, a4, -1781 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1069 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -1959 -; RV64IM-NEXT: slli a4, a4, 14 -; RV64IM-NEXT: addi a4, a4, 713 -; RV64IM-NEXT: mulhu a4, a1, a4 -; RV64IM-NEXT: sub a5, a1, a4 +; RV64IM-NEXT: lhu a2, 16(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI4_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI4_0)(a3) +; RV64IM-NEXT: lhu a4, 24(a1) +; RV64IM-NEXT: lhu a1, 8(a1) +; RV64IM-NEXT: mulhu a3, a2, a3 +; RV64IM-NEXT: sub a5, a2, a3 ; RV64IM-NEXT: srli a5, a5, 1 -; RV64IM-NEXT: add a4, a5, a4 -; RV64IM-NEXT: srli a4, a4, 4 -; RV64IM-NEXT: li a5, 23 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a1, a1, a4 -; RV64IM-NEXT: srli a4, a3, 1 -; RV64IM-NEXT: lui a5, 6413 -; RV64IM-NEXT: addiw a5, a5, 1265 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, 1027 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, 1077 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, 965 -; RV64IM-NEXT: mulhu a4, a4, a5 -; RV64IM-NEXT: srli a4, a4, 7 -; RV64IM-NEXT: li a5, 654 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a3, a3, a4 -; RV64IM-NEXT: lui a4, 1044567 -; RV64IM-NEXT: addiw a4, a4, -575 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 883 -; RV64IM-NEXT: slli a4, a4, 14 -; RV64IM-NEXT: addi a4, a4, -861 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -179 -; RV64IM-NEXT: mulhu a4, a2, a4 -; RV64IM-NEXT: srli a4, a4, 12 -; RV64IM-NEXT: lui a5, 1 -; RV64IM-NEXT: addiw a5, a5, 1327 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a2, a2, a4 +; RV64IM-NEXT: add a3, a5, a3 +; RV64IM-NEXT: srli a3, a3, 4 +; RV64IM-NEXT: li a6, 23 +; RV64IM-NEXT: lui a5, %hi(.LCPI4_1) +; RV64IM-NEXT: ld a5, %lo(.LCPI4_1)(a5) +; RV64IM-NEXT: mulw a3, a3, a6 +; RV64IM-NEXT: subw a6, a2, a3 +; RV64IM-NEXT: srli a3, a1, 1 +; RV64IM-NEXT: mulhu a3, a3, a5 +; RV64IM-NEXT: srli a3, a3, 7 +; RV64IM-NEXT: lui a5, %hi(.LCPI4_2) +; RV64IM-NEXT: ld a5, %lo(.LCPI4_2)(a5) +; RV64IM-NEXT: li a2, 654 +; RV64IM-NEXT: mulw a2, a3, a2 +; RV64IM-NEXT: subw a1, a1, a2 +; RV64IM-NEXT: mulhu a2, a4, a5 +; RV64IM-NEXT: srli a2, a2, 12 +; RV64IM-NEXT: lui a3, 1 +; RV64IM-NEXT: addiw a3, a3, 1327 +; RV64IM-NEXT: mulw a2, a2, a3 +; RV64IM-NEXT: subw a2, a4, a2 ; RV64IM-NEXT: sh zero, 0(a0) ; RV64IM-NEXT: sh a2, 6(a0) -; RV64IM-NEXT: sh a3, 2(a0) -; RV64IM-NEXT: sh a1, 4(a0) +; RV64IM-NEXT: sh a1, 2(a0) +; RV64IM-NEXT: sh a6, 4(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -1130,57 +1070,39 @@ ; ; RV64IM-LABEL: dont_fold_urem_i64: ; RV64IM: # %bb.0: -; RV64IM-NEXT: ld a2, 24(a1) -; RV64IM-NEXT: ld a3, 8(a1) -; RV64IM-NEXT: ld a1, 16(a1) -; RV64IM-NEXT: lui a4, 3206 -; RV64IM-NEXT: addiw a4, a4, -1781 -; RV64IM-NEXT: slli a4, a4, 13 -; RV64IM-NEXT: addi a4, a4, 1069 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -1959 -; RV64IM-NEXT: slli a4, a4, 14 -; RV64IM-NEXT: addi a4, a4, 713 -; RV64IM-NEXT: mulhu a4, a1, a4 -; RV64IM-NEXT: sub a5, a1, a4 +; RV64IM-NEXT: ld a2, 16(a1) +; RV64IM-NEXT: lui a3, %hi(.LCPI6_0) +; RV64IM-NEXT: ld a3, %lo(.LCPI6_0)(a3) +; RV64IM-NEXT: ld a4, 24(a1) +; RV64IM-NEXT: ld a1, 8(a1) +; RV64IM-NEXT: mulhu a3, a2, a3 +; RV64IM-NEXT: sub a5, a2, a3 ; RV64IM-NEXT: srli a5, a5, 1 -; RV64IM-NEXT: add a4, a5, a4 -; RV64IM-NEXT: srli a4, a4, 4 -; RV64IM-NEXT: li a5, 23 -; RV64IM-NEXT: mul a4, a4, a5 -; RV64IM-NEXT: sub a1, a1, a4 -; RV64IM-NEXT: srli a4, a3, 1 -; RV64IM-NEXT: lui a5, 6413 -; RV64IM-NEXT: addiw a5, a5, 1265 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, 1027 -; RV64IM-NEXT: slli a5, a5, 13 -; RV64IM-NEXT: addi a5, a5, 1077 -; RV64IM-NEXT: slli a5, a5, 12 -; RV64IM-NEXT: addi a5, a5, 965 -; RV64IM-NEXT: mulhu a4, a4, a5 -; RV64IM-NEXT: srli a4, a4, 7 -; RV64IM-NEXT: li a5, 654 -; RV64IM-NEXT: mul a4, a4, a5 -; RV64IM-NEXT: sub a3, a3, a4 -; RV64IM-NEXT: lui a4, 1044567 -; RV64IM-NEXT: addiw a4, a4, -575 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, 883 -; RV64IM-NEXT: slli a4, a4, 14 -; RV64IM-NEXT: addi a4, a4, -861 -; RV64IM-NEXT: slli a4, a4, 12 -; RV64IM-NEXT: addi a4, a4, -179 -; RV64IM-NEXT: mulhu a4, a2, a4 -; RV64IM-NEXT: srli a4, a4, 12 -; RV64IM-NEXT: lui a5, 1 -; RV64IM-NEXT: addiw a5, a5, 1327 -; RV64IM-NEXT: mul a4, a4, a5 -; RV64IM-NEXT: sub a2, a2, a4 +; RV64IM-NEXT: add a3, a5, a3 +; RV64IM-NEXT: srli a3, a3, 4 +; RV64IM-NEXT: li a6, 23 +; RV64IM-NEXT: lui a5, %hi(.LCPI6_1) +; RV64IM-NEXT: ld a5, %lo(.LCPI6_1)(a5) +; RV64IM-NEXT: mul a3, a3, a6 +; RV64IM-NEXT: sub a6, a2, a3 +; RV64IM-NEXT: srli a3, a1, 1 +; RV64IM-NEXT: mulhu a3, a3, a5 +; RV64IM-NEXT: srli a3, a3, 7 +; RV64IM-NEXT: lui a5, %hi(.LCPI6_2) +; RV64IM-NEXT: ld a5, %lo(.LCPI6_2)(a5) +; RV64IM-NEXT: li a2, 654 +; RV64IM-NEXT: mul a2, a3, a2 +; RV64IM-NEXT: sub a1, a1, a2 +; RV64IM-NEXT: mulhu a2, a4, a5 +; RV64IM-NEXT: srli a2, a2, 12 +; RV64IM-NEXT: lui a3, 1 +; RV64IM-NEXT: addiw a3, a3, 1327 +; RV64IM-NEXT: mul a2, a2, a3 +; RV64IM-NEXT: sub a2, a4, a2 ; RV64IM-NEXT: sd zero, 0(a0) ; RV64IM-NEXT: sd a2, 24(a0) -; RV64IM-NEXT: sd a3, 8(a0) -; RV64IM-NEXT: sd a1, 16(a0) +; RV64IM-NEXT: sd a1, 8(a0) +; RV64IM-NEXT: sd a6, 16(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i64> %x, ret <4 x i64> %1 diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -1534,34 +1534,16 @@ ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 16(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: li a0, 15 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 2049 -; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, -1147 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 13 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 983 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 14 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 655 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 12 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi t0, a0, 1475 +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, %hi(.LCPI11_0) +; LP64-LP64F-LP64D-FPELIM-NEXT: ld t0, %lo(.LCPI11_0)(a0) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, %hi(.LCPI11_1) +; LP64-LP64F-LP64D-FPELIM-NEXT: ld a2, %lo(.LCPI11_1)(a0) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, %hi(.LCPI11_2) +; LP64-LP64F-LP64D-FPELIM-NEXT: ld a3, %lo(.LCPI11_2)(a0) ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 1192 ; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 381 ; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 12 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a6, a0, -2048 -; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 1048248 -; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 1311 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 12 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, -1147 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 13 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 983 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 15 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a2, a0, 1311 -; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 512 -; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 73 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 15 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, -1311 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 12 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 1147 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a0, 14 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a3, a0, -1967 ; LP64-LP64F-LP64D-FPELIM-NEXT: li a0, 1 ; LP64-LP64F-LP64D-FPELIM-NEXT: li a1, 11 ; LP64-LP64F-LP64D-FPELIM-NEXT: li a4, 12 @@ -1585,34 +1567,16 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 16(sp) ; LP64-LP64F-LP64D-WITHFP-NEXT: li a0, 15 ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 8(sp) -; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, 2049 -; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, -1147 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 13 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 983 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 14 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 655 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 12 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi t0, a0, 1475 +; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, %hi(.LCPI11_0) +; LP64-LP64F-LP64D-WITHFP-NEXT: ld t0, %lo(.LCPI11_0)(a0) +; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, %hi(.LCPI11_1) +; LP64-LP64F-LP64D-WITHFP-NEXT: ld a2, %lo(.LCPI11_1)(a0) +; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, %hi(.LCPI11_2) +; LP64-LP64F-LP64D-WITHFP-NEXT: ld a3, %lo(.LCPI11_2)(a0) ; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, 1192 ; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, 381 ; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 12 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a6, a0, -2048 -; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, 1048248 -; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, 1311 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 12 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, -1147 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 13 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 983 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 15 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a2, a0, 1311 -; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, 512 -; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, 73 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 15 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, -1311 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 12 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 1147 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a0, 14 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a3, a0, -1967 ; LP64-LP64F-LP64D-WITHFP-NEXT: li a0, 1 ; LP64-LP64F-LP64D-WITHFP-NEXT: li a1, 11 ; LP64-LP64F-LP64D-WITHFP-NEXT: li a4, 12