diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h @@ -31,6 +31,9 @@ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; + MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override { @@ -42,6 +45,12 @@ bool hasFP(const MachineFunction &MF) const override; bool hasBP(const MachineFunction &MF) const; + +private: + void determineFrameLayout(MachineFunction &MF) const; + void adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register DestReg, Register SrcReg, + int64_t Val, MachineInstr::MIFlag Flag) const; }; } // namespace llvm #endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -11,7 +11,9 @@ //===----------------------------------------------------------------------===// #include "LoongArchFrameLowering.h" +#include "LoongArchMachineFunctionInfo.h" #include "LoongArchSubtarget.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -44,14 +46,150 @@ return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF); } +void LoongArchFrameLowering::adjustReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register DestReg, + Register SrcReg, int64_t Val, + MachineInstr::MIFlag Flag) const { + const LoongArchInstrInfo *TII = STI.getInstrInfo(); + bool IsLA64 = STI.is64Bit(); + + if (DestReg == SrcReg && Val == 0) + return; + + if (isInt<12>(Val)) { + // addi.w/d $DstReg, $SrcReg, Val + BuildMI(MBB, MBBI, DL, + TII->get(IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W), DestReg) + .addReg(SrcReg) + .addImm(Val) + .setMIFlag(Flag); + } else { + report_fatal_error("adjustReg cannot yet handle adjustments >12 bits"); + } +} + +// Determine the size of the frame and maximum call frame size. +void LoongArchFrameLowering::determineFrameLayout(MachineFunction &MF) const { + MachineFrameInfo &MFI = MF.getFrameInfo(); + + // Get the number of bytes to allocate from the FrameInfo. + uint64_t FrameSize = MFI.getStackSize(); + + // Make sure the frame is aligned. + FrameSize = alignTo(FrameSize, getStackAlign()); + + // Update frame info. + MFI.setStackSize(FrameSize); +} + void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { - // TODO: Implement this when we have function calls + MachineFrameInfo &MFI = MF.getFrameInfo(); + const LoongArchRegisterInfo *RI = STI.getRegisterInfo(); + const LoongArchInstrInfo *TII = STI.getInstrInfo(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + + Register SPReg = LoongArch::R3; + Register FPReg = LoongArch::R22; + + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc DL; + + // Determine the correct frame layout + determineFrameLayout(MF); + + // First, compute final stack size. + uint64_t StackSize = MFI.getStackSize(); + + // Early exit if there is no need to allocate space in the stack. + if (StackSize == 0 && !MFI.adjustsStack()) + return; + + // Adjust stack. + adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup); + // Emit ".cfi_def_cfa_offset StackSize". + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + + const auto &CSI = MFI.getCalleeSavedInfo(); + + // The frame pointer is callee-saved, and code has been generated for us to + // save it to the stack. We need to skip over the storing of callee-saved + // registers as the frame pointer must be modified after it has been saved + // to the stack, not before. + std::advance(MBBI, CSI.size()); + + // Iterate over list of callee-saved registers and emit .cfi_offset + // directives. + for (const auto &Entry : CSI) { + int64_t Offset = MFI.getObjectOffset(Entry.getFrameIdx()); + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, RI->getDwarfRegNum(Entry.getReg(), true), Offset)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } + + // Generate new FP. + if (hasFP(MF)) { + adjustReg(MBB, MBBI, DL, FPReg, SPReg, StackSize, MachineInstr::FrameSetup); + + // Emit ".cfi_def_cfa $fp, 0" + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( + nullptr, RI->getDwarfRegNum(FPReg, true), 0)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } } void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - // TODO: Implement this when we have function calls + const LoongArchRegisterInfo *RI = STI.getRegisterInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + Register SPReg = LoongArch::R3; + + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + const auto &CSI = MFI.getCalleeSavedInfo(); + // Skip to before the restores of callee-saved registers. + auto LastFrameDestroy = MBBI; + if (!CSI.empty()) + LastFrameDestroy = std::prev(MBBI, CSI.size()); + + // Get the number of bytes from FrameInfo. + uint64_t StackSize = MFI.getStackSize(); + + // Restore the stack pointer. + if (RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects()) { + assert(hasFP(MF) && "frame pointer should not have been eliminated"); + adjustReg(MBB, LastFrameDestroy, DL, SPReg, LoongArch::R22, -StackSize, + MachineInstr::FrameDestroy); + } + + // Deallocate stack + adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy); +} + +void LoongArchFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + // Unconditionally spill RA and FP only if the function uses a frame + // pointer. + if (hasFP(MF)) { + SavedRegs.set(LoongArch::R1); + SavedRegs.set(LoongArch::R22); + } + // Mark BP as used if function has dedicated base pointer. + if (hasBP(MF)) + SavedRegs.set(LoongArchABI::getBPReg()); } StackOffset LoongArchFrameLowering::getFrameIndexReference( diff --git a/llvm/test/CodeGen/LoongArch/frame.ll b/llvm/test/CodeGen/LoongArch/frame.ll --- a/llvm/test/CodeGen/LoongArch/frame.ll +++ b/llvm/test/CodeGen/LoongArch/frame.ll @@ -2,12 +2,10 @@ %struct.key_t = type { i32, [16 x i8] } -;; FIXME: prologue and epilogue insertion must be implemented to complete this -;; test - define i32 @test() nounwind { ; CHECK-LABEL: test: ; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -32 ; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill ; CHECK-NEXT: st.w $zero, $sp, 16 ; CHECK-NEXT: st.d $zero, $sp, 8 @@ -17,6 +15,7 @@ ; CHECK-NEXT: bl test1 ; CHECK-NEXT: move $a0, $zero ; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 32 ; CHECK-NEXT: jirl $zero, $ra, 0 %key = alloca %struct.key_t, align 4 %1 = bitcast %struct.key_t* %key to i8* diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/call.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/call.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/call.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/call.ll @@ -1,24 +1,25 @@ ; RUN: llc --mtriple=loongarch32 < %s | FileCheck --check-prefix=LA32 %s ; RUN: llc --mtriple=loongarch64 < %s | FileCheck --check-prefix=LA64 %s -;; FIXME: prologue and epilogue insertion must be implemented to complete this -;; test - declare i32 @external_function(i32) define i32 @test_call_external(i32 %a) nounwind { ; LA32-LABEL: test_call_external: ; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-NEXT: bl external_function ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: jirl $zero, $ra, 0 ; ; LA64-LABEL: test_call_external: ; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 ; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ; LA64-NEXT: bl external_function ; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 ; LA64-NEXT: jirl $zero, $ra, 0 %1 = call i32 @external_function(i32 %a) ret i32 %1 @@ -41,16 +42,20 @@ define i32 @test_call_defined(i32 %a) nounwind { ; LA32-LABEL: test_call_defined: ; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-NEXT: bl defined_function ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: jirl $zero, $ra, 0 ; ; LA64-LABEL: test_call_defined: ; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 ; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ; LA64-NEXT: bl defined_function ; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 ; LA64-NEXT: jirl $zero, $ra, 0 %1 = call i32 @defined_function(i32 %a) nounwind ret i32 %1 @@ -59,20 +64,24 @@ define i32 @test_call_indirect(i32 (i32)* %a, i32 %b) nounwind { ; LA32-LABEL: test_call_indirect: ; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-NEXT: move $a2, $a0 ; LA32-NEXT: move $a0, $a1 ; LA32-NEXT: jirl $ra, $a2, 0 ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: jirl $zero, $ra, 0 ; ; LA64-LABEL: test_call_indirect: ; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 ; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ; LA64-NEXT: move $a2, $a0 ; LA64-NEXT: move $a0, $a1 ; LA64-NEXT: jirl $ra, $a2, 0 ; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 ; LA64-NEXT: jirl $zero, $ra, 0 %1 = call i32 %a(i32 %b) ret i32 %1