diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h @@ -31,6 +31,12 @@ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + MachineBasicBlock::iterator + eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override { + return MBB.erase(MI); + } + bool hasFP(const MachineFunction &MF) const override; bool hasBP(const MachineFunction &MF) const; }; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -27,6 +27,7 @@ FIRST_NUMBER = ISD::BUILTIN_OP_END, // TODO: add more LoongArchISDs + CALL, RET, // 32-bit shifts, directly matching the semantics of the named LoongArch // instructions. @@ -72,6 +73,8 @@ const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override; + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; private: /// Target-specific function used to lower LoongArch calling conventions. diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -397,6 +397,7 @@ return "LoongArchISD::" #node; // TODO: Add more target-dependent nodes later. + NODE_NAME_CASE(CALL) NODE_NAME_CASE(RET) NODE_NAME_CASE(SLL_W) NODE_NAME_CASE(SRA_W) @@ -511,6 +512,132 @@ return Chain; } +// Lower a call to a callseq_start + CALL + callseq_end chain, and add input +// and output parameter nodes. +SDValue +LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc &DL = CLI.DL; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + CallingConv::ID CallConv = CLI.CallConv; + bool IsVarArg = CLI.IsVarArg; + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + CLI.IsTailCall = false; + + if (IsVarArg) + report_fatal_error("LowerCall with varargs not implemented"); + + MachineFunction &MF = DAG.getMachineFunction(); + + // Analyze the operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + + analyzeOutputArgs(ArgCCInfo, Outs, CC_LoongArch); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = ArgCCInfo.getNextStackOffset(); + + for (auto &Arg : Outs) { + if (!Arg.Flags.isByVal()) + continue; + report_fatal_error("Passing arguments byval not implemented"); + } + + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); + + // Copy argument values to their designated locations. + SmallVector> RegsToPass; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue ArgValue = OutVals[i]; + + // Promote the value if needed. + // For now, only handle fully promoted arguments. + if (VA.getLocInfo() != CCValAssign::Full) + report_fatal_error("Unknown loc info"); + + if (VA.isRegLoc()) { + // Queue up the argument copies and emit them at the end. + RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); + } else { + report_fatal_error("Passing arguments via the stack not implemented"); + } + } + + SDValue Glue; + + // Build a sequence of copy-to-reg nodes, chained and glued together. + for (auto &Reg : RegsToPass) { + Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); + Glue = Chain.getValue(1); + } + + // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a + // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't + // split it and then direct call can be matched by PseudoCALL. + // FIXME: Add target flags for relocation. + if (GlobalAddressSDNode *S = dyn_cast(Callee)) + Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT); + else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT); + + // The first call operand is the chain and the second is the target address. + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (auto &Reg : RegsToPass) + Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); + + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // Glue the call to the argument copies, if any. + if (Glue.getNode()) + Ops.push_back(Glue); + + // Emit the call. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops); + DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); + Glue = Chain.getValue(1); + + // Mark the end of the call, which is glued to the call itself. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, DL, PtrVT, true), + DAG.getConstant(0, DL, PtrVT, true), Glue, DL); + Glue = Chain.getValue(1); + + // Assign locations to each value returned by this call. + SmallVector RVLocs; + CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); + analyzeInputArgs(RetCCInfo, Ins, CC_LoongArch); + + // Copy all of the result registers out of their specified physreg. + for (auto &VA : RVLocs) { + // Copy the value out. + SDValue RetValue = + DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); + Chain = RetValue.getValue(1); + Glue = RetValue.getValue(2); + + InVals.push_back(Chain.getValue(0)); + } + + return Chain; +} + bool LoongArchTargetLowering::CanLowerReturn( CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h @@ -30,6 +30,16 @@ void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc) const override; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, Register SrcReg, + bool IsKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, Register DstReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -12,6 +12,7 @@ #include "LoongArchInstrInfo.h" #include "LoongArch.h" +#include "LoongArchMachineFunctionInfo.h" using namespace llvm; @@ -19,8 +20,8 @@ #include "LoongArchGenInstrInfo.inc" LoongArchInstrInfo::LoongArchInstrInfo(LoongArchSubtarget &STI) - // FIXME: add CFSetup and CFDestroy Inst when we implement function call. - : LoongArchGenInstrInfo() {} + : LoongArchGenInstrInfo(LoongArch::ADJCALLSTACKDOWN, + LoongArch::ADJCALLSTACKUP) {} void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -47,3 +48,68 @@ BuildMI(MBB, MBBI, DL, get(Opc), DstReg) .addReg(SrcReg, getKillRegState(KillSrc)); } + +void LoongArchInstrInfo::storeRegToStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, + bool IsKill, int FI, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) + DL = I->getDebugLoc(); + MachineFunction *MF = MBB.getParent(); + MachineFrameInfo &MFI = MF->getFrameInfo(); + + unsigned Opcode; + if (LoongArch::GPRRegClass.hasSubClassEq(RC)) + Opcode = TRI->getRegSizeInBits(LoongArch::GPRRegClass) == 32 + ? LoongArch::ST_W + : LoongArch::ST_D; + else if (LoongArch::FPR32RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::FST_S; + else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::FST_D; + else + llvm_unreachable("Can't store this register to stack slot"); + + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, + MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); + + BuildMI(MBB, I, DL, get(Opcode)) + .addReg(SrcReg, getKillRegState(IsKill)) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); +} + +void LoongArchInstrInfo::loadRegFromStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DstReg, + int FI, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) + DL = I->getDebugLoc(); + MachineFunction *MF = MBB.getParent(); + MachineFrameInfo &MFI = MF->getFrameInfo(); + + unsigned Opcode; + if (LoongArch::GPRRegClass.hasSubClassEq(RC)) + Opcode = TRI->getRegSizeInBits(LoongArch::GPRRegClass) == 32 + ? LoongArch::LD_W + : LoongArch::LD_D; + else if (LoongArch::FPR32RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::FLD_S; + else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::FLD_D; + else + llvm_unreachable("Can't load this register to stack slot"); + + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); + + BuildMI(MBB, I, DL, get(Opcode), DstReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -14,7 +14,14 @@ // LoongArch specific DAG Nodes. //===----------------------------------------------------------------------===// +// Target-independent type requirements, but with target-specific formats. +def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>, + SDTCisVT<1, i32>]>; +def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, + SDTCisVT<1, i32>]>; + // Target-dependent type requirements. +def SDT_LoongArchCall : SDTypeProfile<0, -1, [SDTCisVT<0, GRLenVT>]>; def SDT_LoongArchIntBinOpW : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64> ]>; @@ -24,7 +31,16 @@ ]>; // TODO: Add LoongArch specific DAG Nodes +// Target-independent nodes, but with target-specific formats. +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + // Target-dependent nodes. +def loongarch_call : SDNode<"LoongArchISD::CALL", SDT_LoongArchCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def loongarch_ret : SDNode<"LoongArchISD::RET", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>; @@ -148,6 +164,17 @@ N->getValueType(0)); }]>; +def CallSymbol: AsmOperandClass { + let Name = "CallSymbol"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isImm"; +} + +// A bare symbol used in call only. +def call_symbol : Operand { + let ParserMatchClass = CallSymbol; +} + //===----------------------------------------------------------------------===// // Instruction Formats //===----------------------------------------------------------------------===// @@ -694,6 +721,19 @@ def : Pat<(brind (add GPR:$rj, simm16_lsl2:$imm16)), (PseudoBRIND GPR:$rj, simm16_lsl2:$imm16)>; +let isCall = 1, Defs = [R1] in +def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), []> { + let AsmString = "bl\t$func"; +} + +def : Pat<(loongarch_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>; +def : Pat<(loongarch_call texternalsym:$func), (PseudoCALL texternalsym:$func)>; + +let isCall = 1, Defs = [R1] in +def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rj), + [(loongarch_call GPR:$rj)]>, + PseudoInstExpansion<(JIRL R1, GPR:$rj, 0)>; + let isBarrier = 1, isReturn = 1, isTerminator = 1 in def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>, PseudoInstExpansion<(JIRL R0, R1, 0)>; @@ -748,6 +788,16 @@ defm : StPat; } // Predicates = [IsLA64] +/// Other pseudo-instructions + +// Pessimistically assume the stack pointer will be clobbered +let Defs = [R3], Uses = [R3] in { +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + [(callseq_start timm:$amt1, timm:$amt2)]>; +def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + [(callseq_end timm:$amt1, timm:$amt2)]>; +} // Defs = [R3], Uses = [R3] + //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp @@ -110,6 +110,28 @@ int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { + // TODO: this implementation is a temporary placeholder which does just + // enough to allow other aspects of code generation to be tested. + assert(SPAdj == 0 && "Unexpected non-zero SPAdj value"); - // TODO: Implement this when we have function calls + + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + DebugLoc DL = MI.getDebugLoc(); + + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + Register FrameReg; + StackOffset Offset = + TFI->getFrameIndexReference(MF, FrameIndex, FrameReg) + + StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); + + // Offsets must be directly encoded in a 12-bit immediate field. + if (!isInt<12>(Offset.getFixed())) { + report_fatal_error("Frame offsets outside of the signed 12-bit range is " + "not supported currently"); + } + + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/call.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/call.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/call.ll @@ -0,0 +1,79 @@ +; RUN: llc --mtriple=loongarch32 < %s | FileCheck --check-prefix=LA32 %s +; RUN: llc --mtriple=loongarch64 < %s | FileCheck --check-prefix=LA64 %s + +;; FIXME: prologue and epilogue insertion must be implemented to complete this +;; test + +declare i32 @external_function(i32) + +define i32 @test_call_external(i32 %a) nounwind { +; LA32-LABEL: test_call_external: +; LA32: # %bb.0: +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl external_function +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_call_external: +; LA64: # %bb.0: +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: bl external_function +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: jirl $zero, $ra, 0 + %1 = call i32 @external_function(i32 %a) + ret i32 %1 +} + +define i32 @defined_function(i32 %a) nounwind { +; LA32-LABEL: defined_function: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a0, $a0, 1 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: defined_function: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $a0, $a0, 1 +; LA64-NEXT: jirl $zero, $ra, 0 + %1 = add i32 %a, 1 + ret i32 %1 +} + +define i32 @test_call_defined(i32 %a) nounwind { +; LA32-LABEL: test_call_defined: +; LA32: # %bb.0: +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl defined_function +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_call_defined: +; LA64: # %bb.0: +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: bl defined_function +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: jirl $zero, $ra, 0 + %1 = call i32 @defined_function(i32 %a) nounwind + ret i32 %1 +} + +define i32 @test_call_indirect(i32 (i32)* %a, i32 %b) nounwind { +; LA32-LABEL: test_call_indirect: +; LA32: # %bb.0: +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a2, $a0 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: jirl $ra, $a2, 0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: test_call_indirect: +; LA64: # %bb.0: +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: move $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: jirl $ra, $a2, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: jirl $zero, $ra, 0 + %1 = call i32 %a(i32 %b) + ret i32 %1 +}