Index: llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp =================================================================== --- llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +++ llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -153,7 +153,12 @@ // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; - + + // All calls are tail calls in GHC calling conv, and functions have no + // prologue/epilogue. + if (MF.getFunction().getCallingConv() == CallingConv::GHC) + return; + // Determine the correct frame layout determineFrameLayout(MF); @@ -284,7 +289,12 @@ MachineFrameInfo &MFI = MF.getFrameInfo(); auto *LoongArchFI = MF.getInfo(); Register SPReg = LoongArch::R3; - + + // All calls are tail calls in GHC calling conv, and functions have no + // prologue/epilogue. + if (MF.getFunction().getCallingConv() == CallingConv::GHC) + return; + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); Index: llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp =================================================================== --- llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -426,16 +426,30 @@ SDLoc DL(N); EVT Ty = getPointerTy(DAG.getDataLayout()); SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); - // TODO: Check CodeModel. - if (IsLocal) + + if (isPositionIndependent()) { + if (IsLocal) + // This generates the pattern (PseudoLA_PCREL sym), which expands to + // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)). + return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), + 0); + + // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d + // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)). + return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0); + } + + switch (getTargetMachine().getCodeModel()) { + default: + report_fatal_error("Unsupported code model for lowering"); + case CodeModel::Small: + case CodeModel::Medium: { // This generates the pattern (PseudoLA_PCREL sym), which expands to // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)). return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), - 0); - - // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d - // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)). - return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0); + 0); + } + } } SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, @@ -513,6 +527,10 @@ SDValue Addr; TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); + if (DAG.getMachineFunction().getFunction().getCallingConv() == + CallingConv::GHC) + report_fatal_error("In GHC calling convention TLS is not supported"); + switch (Model) { case TLSModel::GeneralDynamic: // In this model, application code calls the dynamic linker function @@ -1608,6 +1626,47 @@ return Val; } +static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + if (LocVT == MVT::i32 || LocVT == MVT::i64) { + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim + // s0 s1 s2 s3 s4 s5 s6 s7 s8 + static const MCPhysReg GPRList[] = { + LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26, LoongArch::R27, + LoongArch::R28, LoongArch::R29, LoongArch::R30, LoongArch::R31}; + if (unsigned Reg = State.AllocateReg(GPRList)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + if (LocVT == MVT::f32) { + // Pass in STG registers: F1, F2, F3, F4 + // fs0,fs1,fs2,fs3 + static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25, + LoongArch::F26, LoongArch::F27}; + if (unsigned Reg = State.AllocateReg(FPR32List)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + if (LocVT == MVT::f64) { + // Pass in STG registers: D1, D2, D3, D4 + // fs4,fs5,fs6,fs7 + static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64, + LoongArch::F30_64, LoongArch::F31_64}; + if (unsigned Reg = State.AllocateReg(FPR64List)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + report_fatal_error("No registers left in GHC calling convention"); + return true; +} + // Transform physical registers into virtual registers. SDValue LoongArchTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, @@ -1622,6 +1681,11 @@ case CallingConv::C: case CallingConv::Fast: break; + case CallingConv::GHC: + if (!MF.getSubtarget().getFeatureBits()[LoongArch::FeatureBasicF] || + !MF.getSubtarget().getFeatureBits()[LoongArch::FeatureBasicD]) + report_fatal_error( + "GHC calling convention requires the F and D extensions"); } EVT PtrVT = getPointerTy(DAG.getDataLayout()); @@ -1634,7 +1698,10 @@ SmallVector ArgLocs; CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch); + if (CallConv == CallingConv::GHC) + CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC); + else + analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -1760,7 +1827,10 @@ SmallVector ArgLocs; CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch); + if (CallConv == CallingConv::GHC) + ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC); + else + analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = ArgCCInfo.getNextStackOffset(); @@ -1983,7 +2053,10 @@ analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, nullptr, CC_LoongArch); - + + if (CallConv == CallingConv::GHC && !RVLocs.empty()) + report_fatal_error("GHC functions return void only"); + SDValue Glue; SmallVector RetOps(1, Chain); Index: llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp =================================================================== --- llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +++ llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp @@ -38,6 +38,8 @@ LoongArchRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { auto &Subtarget = MF->getSubtarget(); + if (MF->getFunction().getCallingConv() == CallingConv::GHC) + return CSR_NoRegs_SaveList; switch (Subtarget.getTargetABI()) { default: llvm_unreachable("Unrecognized ABI"); @@ -58,6 +60,8 @@ CallingConv::ID CC) const { auto &Subtarget = MF.getSubtarget(); + if (CC == CallingConv::GHC) + return CSR_NoRegs_RegMask; switch (Subtarget.getTargetABI()) { default: llvm_unreachable("Unrecognized ABI"); Index: llvm/test/CodeGen/LoongArch/ghc-cc.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/LoongArch/ghc-cc.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+f,+d < %s | FileCheck %s --check-prefix=LA64 + +; Check the GHC call convention works (la64) + +@base = external global i64 ; assigned to register: s0 +@sp = external global i64 ; assigned to register: s1 +@hp = external global i64 ; assigned to register: s2 +@r1 = external global i64 ; assigned to register: s3 +@r2 = external global i64 ; assigned to register: s4 +@r3 = external global i64 ; assigned to register: s5 +@r4 = external global i64 ; assigned to register: s6 +@r5 = external global i64 ; assigned to register: s7 +@splim = external global i64 ; assigned to register: s8 + +@f1 = external global float ; assigned to register: fs0 +@f2 = external global float ; assigned to register: fs1 +@f3 = external global float ; assigned to register: fs2 +@f4 = external global float ; assigned to register: fs3 + +@d1 = external global double ; assigned to register: fs4 +@d2 = external global double ; assigned to register: fs5 +@d3 = external global double ; assigned to register: fs6 +@d4 = external global double ; assigned to register: fs7 + +define ghccc void @foo() nounwind { +; LA64-LABEL: foo: +; LA64: # %bb.0: # %entry +; LA64-NEXT: pcalau12i $a0, %pc_hi20(base) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(base) +; LA64-NEXT: ld.d $s0, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(sp) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(sp) +; LA64-NEXT: ld.d $s1, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(hp) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(hp) +; LA64-NEXT: ld.d $s2, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(r1) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(r1) +; LA64-NEXT: ld.d $s3, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(r2) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(r2) +; LA64-NEXT: ld.d $s4, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(r3) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(r3) +; LA64-NEXT: ld.d $s5, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(r4) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(r4) +; LA64-NEXT: ld.d $s6, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(r5) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(r5) +; LA64-NEXT: ld.d $s7, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(splim) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(splim) +; LA64-NEXT: ld.d $s8, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(f1) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(f1) +; LA64-NEXT: fld.s $fs0, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(f2) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(f2) +; LA64-NEXT: fld.s $fs1, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(f3) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(f3) +; LA64-NEXT: fld.s $fs2, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(f4) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(f4) +; LA64-NEXT: fld.s $fs3, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(d1) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(d1) +; LA64-NEXT: fld.d $fs4, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(d2) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(d2) +; LA64-NEXT: fld.d $fs5, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(d3) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(d3) +; LA64-NEXT: fld.d $fs6, $a0, 0 +; LA64-NEXT: pcalau12i $a0, %pc_hi20(d4) +; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(d4) +; LA64-NEXT: fld.d $fs7, $a0, 0 +; LA64-NEXT: bl %plt(bar) +; LA64-NEXT: ret + +entry: + %0 = load double, ptr @d4 + %1 = load double, ptr @d3 + %2 = load double, ptr @d2 + %3 = load double, ptr @d1 + %4 = load float, ptr @f4 + %5 = load float, ptr @f3 + %6 = load float, ptr @f2 + %7 = load float, ptr @f1 + %8 = load i64, ptr @splim + %9 = load i64, ptr @r5 + %10 = load i64, ptr @r4 + %11 = load i64, ptr @r3 + %12 = load i64, ptr @r2 + %13 = load i64, ptr @r1 + %14 = load i64, ptr @hp + %15 = load i64, ptr @sp + %16 = load i64, ptr @base + tail call ghccc void @bar(i64 %16, i64 %15, i64 %14, i64 %13, i64 %12, + i64 %11, i64 %10, i64 %9, i64 %8, float %7, float %6, + float %5, float %4, double %3, double %2, double %1, double %0) nounwind + ret void +} + +declare ghccc void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, + float, float, float, float, + double, double, double, double)