diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -2210,13 +2210,19 @@ void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value, MCStreamer &Out) { - RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Value, isRV64()); + RISCVMatInt::InstSeq Seq = + RISCVMatInt::generateInstSeq(Value, getSTI().getFeatureBits()); MCRegister SrcReg = RISCV::X0; for (RISCVMatInt::Inst &Inst : Seq) { if (Inst.Opc == RISCV::LUI) { emitToStreamer( Out, MCInstBuilder(RISCV::LUI).addReg(DestReg).addImm(Inst.Imm)); + } else if (Inst.Opc == RISCV::ADDUW) { + emitToStreamer(Out, MCInstBuilder(RISCV::ADDUW) + .addReg(DestReg) + .addReg(SrcReg) + .addReg(RISCV::X0)); } else { emitToStreamer( Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addImm( diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h @@ -10,10 +10,12 @@ #define LLVM_LIB_TARGET_RISCV_MATINT_H #include "llvm/ADT/SmallVector.h" +#include "llvm/MC/SubtargetFeature.h" #include namespace llvm { class APInt; +class MCSubtargetInfo; namespace RISCVMatInt { struct Inst { @@ -29,15 +31,16 @@ // simple struct is produced rather than directly emitting the instructions in // order to allow this helper to be used from both the MC layer and during // instruction selection. -InstSeq generateInstSeq(int64_t Val, bool IsRV64); +InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures); // Helper to estimate the number of instructions required to materialise the // given immediate value into a register. This estimate does not account for // `Val` possibly fitting into an immediate, and so may over-estimate. // // This will attempt to produce instructions to materialise `Val` as an -// `Size`-bit immediate. `IsRV64` should match the target architecture. -int getIntMatCost(const APInt &Val, unsigned Size, bool IsRV64); +// `Size`-bit immediate. +int getIntMatCost(const APInt &Val, unsigned Size, + const FeatureBitset &ActiveFeatures); } // namespace RISCVMatInt } // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp @@ -13,8 +13,11 @@ using namespace llvm; // Recursively generate a sequence for materializing an integer. -static void generateInstSeqImpl(int64_t Val, bool IsRV64, +static void generateInstSeqImpl(int64_t Val, + const FeatureBitset &ActiveFeatures, RISCVMatInt::InstSeq &Res) { + bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit]; + if (isInt<32>(Val)) { // Depending on the active bits in the immediate Value v, the following // instruction sequences are emitted: @@ -66,7 +69,7 @@ int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52); Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount); - generateInstSeqImpl(Hi52, IsRV64, Res); + generateInstSeqImpl(Hi52, ActiveFeatures, Res); Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount)); if (Lo12) @@ -75,44 +78,73 @@ namespace llvm { namespace RISCVMatInt { -InstSeq generateInstSeq(int64_t Val, bool IsRV64) { +InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { RISCVMatInt::InstSeq Res; - generateInstSeqImpl(Val, IsRV64, Res); + generateInstSeqImpl(Val, ActiveFeatures, Res); // If the constant is positive we might be able to generate a shifted constant // with no leading zeros and use a final SRLI to restore them. if (Val > 0 && Res.size() > 2) { - assert(IsRV64 && "Expected RV32 to only need 2 instructions"); - unsigned ShiftAmount = countLeadingZeros((uint64_t)Val); - Val <<= ShiftAmount; + assert(ActiveFeatures[RISCV::Feature64Bit] && + "Expected RV32 to only need 2 instructions"); + unsigned LeadingZeros = countLeadingZeros((uint64_t)Val); + uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros; // Fill in the bits that will be shifted out with 1s. An example where this // helps is trailing one masks with 32 or more ones. This will generate // ADDI -1 and an SRLI. - Val |= maskTrailingOnes(ShiftAmount); + ShiftedVal |= maskTrailingOnes(LeadingZeros); RISCVMatInt::InstSeq TmpSeq; - generateInstSeqImpl(Val, IsRV64, TmpSeq); - TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, ShiftAmount)); + generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq); + TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros)); // Keep the new sequence if it is an improvement. - if (TmpSeq.size() < Res.size()) + if (TmpSeq.size() < Res.size()) { Res = TmpSeq; + // A 2 instruction sequence is the best we can do. + if (Res.size() <= 2) + return Res; + } // Some cases can benefit from filling the lower bits with zeros instead. - Val &= maskTrailingZeros(ShiftAmount); + ShiftedVal &= maskTrailingZeros(LeadingZeros); TmpSeq.clear(); - generateInstSeqImpl(Val, IsRV64, TmpSeq); - TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, ShiftAmount)); + generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq); + TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros)); // Keep the new sequence if it is an improvement. - if (TmpSeq.size() < Res.size()) + if (TmpSeq.size() < Res.size()) { Res = TmpSeq; + // A 2 instruction sequence is the best we can do. + if (Res.size() <= 2) + return Res; + } + + // If we have exactly 32 leading zeros and Zba, we can try using zext.w at + // the end of the sequence. + if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureExtZba]) { + // Try replacing upper bits with 1. + uint64_t LeadingOnesVal = Val | maskLeadingOnes(LeadingZeros); + TmpSeq.clear(); + generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq); + TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDUW, 0)); + + // Keep the new sequence if it is an improvement. + if (TmpSeq.size() < Res.size()) { + Res = TmpSeq; + // A 2 instruction sequence is the best we can do. + if (Res.size() <= 2) + return Res; + } + } } return Res; } -int getIntMatCost(const APInt &Val, unsigned Size, bool IsRV64) { +int getIntMatCost(const APInt &Val, unsigned Size, + const FeatureBitset &ActiveFeatures) { + bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit]; int PlatRegSize = IsRV64 ? 64 : 32; // Split the constant into platform register sized chunks, and calculate cost @@ -120,7 +152,7 @@ int Cost = 0; for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) { APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize); - InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), IsRV64); + InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), ActiveFeatures); Cost += MatSeq.size(); } return std::max(1, Cost); diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -112,8 +112,10 @@ } static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm, - MVT XLenVT) { - RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, XLenVT == MVT::i64); + const RISCVSubtarget &Subtarget) { + MVT XLenVT = Subtarget.getXLenVT(); + RISCVMatInt::InstSeq Seq = + RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); SDNode *Result = nullptr; SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT); @@ -121,6 +123,9 @@ SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT); if (Inst.Opc == RISCV::LUI) Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm); + else if (Inst.Opc == RISCV::ADDUW) + Result = CurDAG->getMachineNode(RISCV::ADDUW, DL, XLenVT, SrcReg, + CurDAG->getRegister(RISCV::X0, XLenVT)); else Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm); @@ -454,7 +459,8 @@ ReplaceNode(Node, New.getNode()); return; } - ReplaceNode(Node, selectImm(CurDAG, DL, ConstNode->getSExtValue(), XLenVT)); + ReplaceNode(Node, + selectImm(CurDAG, DL, ConstNode->getSExtValue(), *Subtarget)); return; } case ISD::FrameIndex: { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6239,9 +6239,9 @@ // Neither constant will fit into an immediate, so find materialisation // costs. int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), - Subtarget.is64Bit()); + Subtarget.getFeatureBits()); int ShiftedC1Cost = RISCVMatInt::getIntMatCost( - ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); + ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits()); // Materialising `c1` is cheaper than materialising `c1 << c2`, so the // combine should be prevented. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -432,16 +432,16 @@ MachineInstr::MIFlag Flag) const { MachineFunction *MF = MBB.getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - bool IsRV64 = MF->getSubtarget().is64Bit(); Register SrcReg = RISCV::X0; Register Result = MRI.createVirtualRegister(&RISCV::GPRRegClass); unsigned Num = 0; - if (!IsRV64 && !isInt<32>(Val)) + if (!STI.is64Bit() && !isInt<32>(Val)) report_fatal_error("Should only materialize 32-bit constants for RV32"); - RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, IsRV64); - assert(Seq.size() > 0); + RISCVMatInt::InstSeq Seq = + RISCVMatInt::generateInstSeq(Val, STI.getFeatureBits()); + assert(!Seq.empty()); for (RISCVMatInt::Inst &Inst : Seq) { // Write the final result to DstReg if it's the last instruction in the Seq. @@ -453,6 +453,11 @@ BuildMI(MBB, MBBI, DL, get(RISCV::LUI), Result) .addImm(Inst.Imm) .setMIFlag(Flag); + } else if (Inst.Opc == RISCV::ADDUW) { + BuildMI(MBB, MBBI, DL, get(RISCV::ADDUW), Result) + .addReg(SrcReg, RegState::Kill) + .addReg(RISCV::X0) + .setMIFlag(Flag); } else { BuildMI(MBB, MBBI, DL, get(Inst.Opc), Result) .addReg(SrcReg, RegState::Kill) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -27,7 +27,7 @@ // Otherwise, we check how many instructions it will take to materialise. const DataLayout &DL = getDataLayout(); return RISCVMatInt::getIntMatCost(Imm, DL.getTypeSizeInBits(Ty), - getST()->is64Bit()); + getST()->getFeatureBits()); } InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -886,3 +886,50 @@ %c = mul i64 %a, 264 ret i64 %c } + +define i64 @imm_zextw() nounwind { +; RV64I-LABEL: imm_zextw: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a0, zero, 1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, -2 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: imm_zextw: +; RV64IB: # %bb.0: +; RV64IB-NEXT: addi a0, zero, -2 +; RV64IB-NEXT: zext.w a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBA-LABEL: imm_zextw: +; RV64IBA: # %bb.0: +; RV64IBA-NEXT: addi a0, zero, -2 +; RV64IBA-NEXT: zext.w a0, a0 +; RV64IBA-NEXT: ret + ret i64 4294967294 ; -2 in 32 bits. +} + +define i64 @imm_zextw2() nounwind { +; RV64I-LABEL: imm_zextw2: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a0, 171 +; RV64I-NEXT: addiw a0, a0, -1365 +; RV64I-NEXT: slli a0, a0, 12 +; RV64I-NEXT: addi a0, a0, -1366 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: imm_zextw2: +; RV64IB: # %bb.0: +; RV64IB-NEXT: lui a0, 699051 +; RV64IB-NEXT: addiw a0, a0, -1366 +; RV64IB-NEXT: zext.w a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBA-LABEL: imm_zextw2: +; RV64IBA: # %bb.0: +; RV64IBA-NEXT: lui a0, 699051 +; RV64IBA-NEXT: addiw a0, a0, -1366 +; RV64IBA-NEXT: zext.w a0, a0 +; RV64IBA-NEXT: ret + ret i64 2863311530 ; 0xAAAAAAAA +} diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll --- a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll @@ -240,9 +240,8 @@ ; ; RV64IB-LABEL: rol_i32_neg_constant_rhs: ; RV64IB: # %bb.0: -; RV64IB-NEXT: addi a1, zero, 1 -; RV64IB-NEXT: slli a1, a1, 32 -; RV64IB-NEXT: addi a1, a1, -2 +; RV64IB-NEXT: addi a1, zero, -2 +; RV64IB-NEXT: zext.w a1, a1 ; RV64IB-NEXT: rolw a0, a1, a0 ; RV64IB-NEXT: ret ; @@ -370,9 +369,8 @@ ; ; RV64IB-LABEL: ror_i32_neg_constant_rhs: ; RV64IB: # %bb.0: -; RV64IB-NEXT: addi a1, zero, 1 -; RV64IB-NEXT: slli a1, a1, 32 -; RV64IB-NEXT: addi a1, a1, -2 +; RV64IB-NEXT: addi a1, zero, -2 +; RV64IB-NEXT: zext.w a1, a1 ; RV64IB-NEXT: rorw a0, a1, a0 ; RV64IB-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll --- a/llvm/test/CodeGen/RISCV/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/xaluo.ll @@ -420,9 +420,8 @@ ; ; RV64ZBA-LABEL: uaddo.i32.constant: ; RV64ZBA: # %bb.0: # %entry -; RV64ZBA-NEXT: addi a2, zero, 1 -; RV64ZBA-NEXT: slli a2, a2, 32 -; RV64ZBA-NEXT: addi a3, a2, -2 +; RV64ZBA-NEXT: addi a2, zero, -2 +; RV64ZBA-NEXT: zext.w a3, a2 ; RV64ZBA-NEXT: addw a2, a0, a3 ; RV64ZBA-NEXT: sext.w a4, a0 ; RV64ZBA-NEXT: sltu a2, a2, a4 @@ -758,9 +757,8 @@ ; ; RV64ZBA-LABEL: usubo.i32.constant.lhs: ; RV64ZBA: # %bb.0: # %entry -; RV64ZBA-NEXT: addi a2, zero, 1 -; RV64ZBA-NEXT: slli a2, a2, 32 -; RV64ZBA-NEXT: addi a3, a2, -2 +; RV64ZBA-NEXT: addi a2, zero, -2 +; RV64ZBA-NEXT: zext.w a3, a2 ; RV64ZBA-NEXT: subw a2, a3, a0 ; RV64ZBA-NEXT: addi a2, a2, 1 ; RV64ZBA-NEXT: seqz a2, a2 diff --git a/llvm/test/MC/RISCV/rv64b-aliases-valid.s b/llvm/test/MC/RISCV/rv64b-aliases-valid.s --- a/llvm/test/MC/RISCV/rv64b-aliases-valid.s +++ b/llvm/test/MC/RISCV/rv64b-aliases-valid.s @@ -362,3 +362,17 @@ # CHECK-S-OBJ-NOALIAS: gorciw t0, t1, 13 # CHECK-S-OBJ: gorciw t0, t1, 13 gorcw x5, x6, 13 + +# CHECK-S-OBJ-NOALIAS: addi t1, zero, -2 +# CHECK-S-OBJ-NOALIAS-NEXT: add.uw t1, t1, zero +# CHECK-S-OBJ: addi t1, zero, -2 +# CHECK-S-OBJ-NEXT: zext.w t1, t1 +li x6, 0xfffffffe + +# CHECK-S-OBJ-NOALIAS: lui t2, 699051 +# CHECK-S-OBJ-NOALIAS-NEXT: addiw t2, t2, -1366 +# CHECK-S-OBJ-NOALIAS-NEXT: add.uw t2, t2, zero +# CHECK-S-OBJ: lui t2, 699051 +# CHECK-S-OBJ-NEXT: addiw t2, t2, -1366 +# CHECK-S-OBJ-NEXT: zext.w t2, t2 +li x7, 0xaaaaaaaa