Index: llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp =================================================================== --- llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -2977,35 +2977,11 @@ void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value, MCStreamer &Out) { - RISCVMatInt::InstSeq Seq = - RISCVMatInt::generateInstSeq(Value, getSTI().getFeatureBits()); - - MCRegister SrcReg = RISCV::X0; - for (const RISCVMatInt::Inst &Inst : Seq) { - switch (Inst.getOpndKind()) { - case RISCVMatInt::Imm: - emitToStreamer(Out, - MCInstBuilder(Inst.getOpcode()).addReg(DestReg).addImm(Inst.getImm())); - break; - case RISCVMatInt::RegX0: - emitToStreamer( - Out, MCInstBuilder(Inst.getOpcode()).addReg(DestReg).addReg(SrcReg).addReg( - RISCV::X0)); - break; - case RISCVMatInt::RegReg: - emitToStreamer( - Out, MCInstBuilder(Inst.getOpcode()).addReg(DestReg).addReg(SrcReg).addReg( - SrcReg)); - break; - case RISCVMatInt::RegImm: - emitToStreamer( - Out, MCInstBuilder(Inst.getOpcode()).addReg(DestReg).addReg(SrcReg).addImm( - Inst.getImm())); - break; - } + SmallVector Seq = + RISCVMatInt::generateMCInstSeq(Value, getSTI().getFeatureBits(), DestReg); - // Only the first instruction has X0 as its source. - SrcReg = DestReg; + for (MCInst &Inst : Seq) { + emitToStreamer(Out, Inst); } } Index: llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h =================================================================== --- llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h +++ llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h @@ -10,6 +10,8 @@ #define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_MATINT_H #include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/TargetParser/SubtargetFeature.h" #include @@ -56,6 +58,11 @@ InstSeq generateTwoRegInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures, unsigned &ShiftAmt, unsigned &AddOpc); +// Helper to generate the generateInstSeq instruction sequence using MCInsts +SmallVector generateMCInstSeq(int64_t Val, + const FeatureBitset &ActiveFeatures, + MCRegister DestReg); + // Helper to estimate the number of instructions required to materialise the // given immediate value into a register. This estimate does not account for // `Val` possibly fitting into an immediate, and so may over-estimate. Index: llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp =================================================================== --- llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp +++ llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp @@ -9,6 +9,7 @@ #include "RISCVMatInt.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "llvm/ADT/APInt.h" +#include "llvm/MC/MCInstBuilder.h" #include "llvm/Support/MathExtras.h" using namespace llvm; @@ -471,6 +472,47 @@ return RISCVMatInt::InstSeq(); } +SmallVector generateMCInstSeq(int64_t Val, + const FeatureBitset &ActiveFeatures, + MCRegister DestReg) { + RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, ActiveFeatures); + + SmallVector instructions; + + MCRegister SrcReg = RISCV::X0; + for (RISCVMatInt::Inst &Inst : Seq) { + switch (Inst.getOpndKind()) { + case RISCVMatInt::Imm: + instructions.push_back(MCInstBuilder(Inst.getOpcode()) + .addReg(DestReg) + .addImm(Inst.getImm())); + break; + case RISCVMatInt::RegX0: + instructions.push_back(MCInstBuilder(Inst.getOpcode()) + .addReg(DestReg) + .addReg(SrcReg) + .addReg(RISCV::X0)); + break; + case RISCVMatInt::RegReg: + instructions.push_back(MCInstBuilder(Inst.getOpcode()) + .addReg(DestReg) + .addReg(SrcReg) + .addReg(SrcReg)); + break; + case RISCVMatInt::RegImm: + instructions.push_back(MCInstBuilder(Inst.getOpcode()) + .addReg(DestReg) + .addReg(SrcReg) + .addImm(Inst.getImm())); + break; + } + + // Only the first instruction has X0 as its source. + SrcReg = DestReg; + } + return instructions; +} + int getIntMatCost(const APInt &Val, unsigned Size, const FeatureBitset &ActiveFeatures, bool CompressionCost) { bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit]; Index: llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -14,6 +14,7 @@ #include "MCTargetDesc/RISCVBaseInfo.h" #include "MCTargetDesc/RISCVInstPrinter.h" #include "MCTargetDesc/RISCVMCExpr.h" +#include "MCTargetDesc/RISCVMatInt.h" #include "MCTargetDesc/RISCVTargetStreamer.h" #include "RISCV.h" #include "RISCVMachineFunctionInfo.h" @@ -152,8 +153,35 @@ PatchPointOpers Opers(&MI); + const MachineOperand &CalleeMO = Opers.getCallTarget(); unsigned EncodedBytes = 0; + if (CalleeMO.isImm()) { + uint64_t CallTarget = CalleeMO.getImm(); + if (CallTarget) { + assert((CallTarget & 0xFFFF'FFFF'FFFF) == CallTarget && + "High 16 bits of call target should be zero."); + // Materialize the jump address: + SmallVector Seq = RISCVMatInt::generateMCInstSeq( + CallTarget, STI->getFeatureBits(), RISCV::X1); + for (MCInst &Inst : Seq) { + EmitToStreamer(OutStreamer, Inst); + } + EncodedBytes += Seq.size() * 4; + EmitToStreamer(OutStreamer, MCInstBuilder(RISCV::JALR) + .addReg(RISCV::X1) + .addReg(RISCV::X1) + .addImm(0)); + EncodedBytes += 4; + } + } else if (CalleeMO.isGlobal()) { + MCOperand CallTargetMCOp; + lowerOperand(CalleeMO, CallTargetMCOp); + EmitToStreamer(OutStreamer, + MCInstBuilder(RISCV::PseudoCALL).addOperand(CallTargetMCOp)); + EncodedBytes += 8; + } + // Emit padding. unsigned NumBytes = Opers.getNumPatchBytes(); assert(NumBytes >= EncodedBytes && @@ -172,6 +200,35 @@ assert(PatchBytes % NOPBytes == 0 && "Invalid number of NOP bytes requested!"); emitNops(PatchBytes / NOPBytes); + } else { + // Lower call target and choose correct opcode + const MachineOperand &CallTarget = SOpers.getCallTarget(); + MCOperand CallTargetMCOp; + switch (CallTarget.getType()) { + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + lowerOperand(CallTarget, CallTargetMCOp); + EmitToStreamer( + OutStreamer, + MCInstBuilder(RISCV::PseudoCALL).addOperand(CallTargetMCOp)); + break; + case MachineOperand::MO_Immediate: + CallTargetMCOp = MCOperand::createImm(CallTarget.getImm()); + EmitToStreamer(OutStreamer, MCInstBuilder(RISCV::JAL) + .addReg(RISCV::X1) + .addOperand(CallTargetMCOp)); + break; + case MachineOperand::MO_Register: + CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); + EmitToStreamer(OutStreamer, MCInstBuilder(RISCV::JALR) + .addReg(RISCV::X1) + .addOperand(CallTargetMCOp) + .addImm(0)); + break; + default: + llvm_unreachable("Unsupported operand type in statepoint call target"); + break; + } } auto &Ctx = OutStreamer.getContext(); Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16229,6 +16229,17 @@ case RISCV::PseudoFROUND_D_IN32X: return emitFROUND(MI, BB, Subtarget); case TargetOpcode::STATEPOINT: + // STATEPOINT is a pseudo instruction which has no implicit defs/uses + // while jal call instruction (where statepoint will be lowered at the end) + // has implicit def. This def is early-clobber as it will be set at + // the moment of the call and earlier than any use is read. + // Add this implicit dead def here as a workaround. + MI.addOperand(*MI.getMF(), + MachineOperand::CreateReg( + RISCV::X1, /*isDef*/ true, + /*isImp*/ true, /*isKill*/ false, /*isDead*/ true, + /*isUndef*/ false, /*isEarlyClobber*/ true)); + [[fallthrough]]; case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: if (!Subtarget.is64Bit()) Index: llvm/lib/Target/RISCV/RISCVInstrInfo.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1310,9 +1310,14 @@ case TargetOpcode::PATCHPOINT: // The size of the patchpoint intrinsic is the number of bytes requested return PatchPointOpers(&MI).getNumPatchBytes(); - case TargetOpcode::STATEPOINT: + case TargetOpcode::STATEPOINT: { // The size of the statepoint intrinsic is the number of bytes requested - return StatepointOpers(&MI).getNumPatchBytes(); + unsigned NumBytes = StatepointOpers(&MI).getNumPatchBytes(); + // A statepoint is at least a PseudoCALL + if (NumBytes < 8) + NumBytes = 8; + return NumBytes; + } default: return get(Opcode).getSize(); } Index: llvm/test/CodeGen/RISCV/rv64-patchpoint.ll =================================================================== --- llvm/test/CodeGen/RISCV/rv64-patchpoint.ll +++ llvm/test/CodeGen/RISCV/rv64-patchpoint.ll @@ -1,12 +1,56 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -debug-entry-values -enable-misched=0 < %s | FileCheck %s +; Trivial patchpoint codegen +; +define i64 @trivial_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) { +; CHECK-LABEL: trivial_patchpoint_codegen: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sd s0, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset s0, -8 +; CHECK-NEXT: .cfi_offset s1, -16 +; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: lui ra, 3563 +; CHECK-NEXT: addiw ra, ra, -577 +; CHECK-NEXT: slli ra, ra, 12 +; CHECK-NEXT: addi ra, ra, -259 +; CHECK-NEXT: slli ra, ra, 12 +; CHECK-NEXT: addi ra, ra, -1282 +; CHECK-NEXT: jalr ra +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a1, s1 +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: lui ra, 3563 +; CHECK-NEXT: addiw ra, ra, -577 +; CHECK-NEXT: slli ra, ra, 12 +; CHECK-NEXT: addi ra, ra, -259 +; CHECK-NEXT: slli ra, ra, 12 +; CHECK-NEXT: addi ra, ra, -1281 +; CHECK-NEXT: jalr ra +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: ld s0, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +entry: + %resolveCall2 = inttoptr i64 244837814094590 to i8* + %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 28, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4) + %resolveCall3 = inttoptr i64 244837814094591 to i8* + tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 3, i32 28, i8* %resolveCall3, i32 2, i64 %p1, i64 %result) + ret i64 %result +} + ; Test small patchpoints that don't emit calls. define void @small_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) { ; CHECK-LABEL: small_patchpoint_codegen: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: .Ltmp2: ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop