diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h @@ -34,6 +34,9 @@ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const override; + bool hasReservedCallFrame(const MachineFunction &MF) const override; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -118,6 +118,26 @@ MFI.setStackSize(FrameSize); } +void LoongArchFrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS) const { + const LoongArchRegisterInfo *RI = STI.getRegisterInfo(); + const TargetRegisterClass &RC = LoongArch::GPRRegClass; + MachineFrameInfo &MFI = MF.getFrameInfo(); + + // estimateStackSize has been observed to under-estimate the final stack + // size, so give ourselves wiggle-room by checking for stack size + // representable an 11-bit signed field rather than 12-bits. + if (isInt<11>(MFI.estimateStackSize(MF))) + return; + + // Create an emergency spill slot. + int FI = + MFI.CreateStackObject(RI->getSpillSize(RC), RI->getSpillAlign(RC), false); + RS->addScavengingFrameIndex(FI); + LLVM_DEBUG(dbgs() << "Allocated FI(" << FI + << ") as the emergency spill slot.\n"); +} + void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -144,7 +164,22 @@ return; // Adjust stack. - adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup); + unsigned Addi = STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; + if (hasFP(MF)) { + adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, + MachineInstr::FrameSetup); + } else { + // FIXME: RegScavenger will place the spill instruction before the prologue + // if a VReg is created in the prologue. This will pollute the caller's + // stack data. Therefore, until there is better way, we just use the + // `addi.w/d` instruction for stack adjustment to ensure that VReg will not + // be created. + for (int Val = StackSize; Val > 0; Val -= 2048) + BuildMI(MBB, MBBI, DL, TII->get(Addi), SPReg) + .addReg(SPReg) + .addImm(Val < 2048 ? -Val : -2048) + .setMIFlag(MachineInstr::FrameSetup); + } // Emit ".cfi_def_cfa_offset StackSize". unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize)); diff --git a/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll b/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 -O0 < %s | FileCheck %s + +@var = external global i32 + +define void @func() { +; CHECK-LABEL: func: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -2048 +; CHECK-NEXT: addi.d $sp, $sp, -2048 +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 4112 +; CHECK-NEXT: pcalau12i $a0, %got_hi20(var) +; CHECK-NEXT: ld.d $a1, $a0, %got_lo12(var) +; CHECK-NEXT: ld.w $t8, $a1, 0 +; CHECK-NEXT: ld.w $t7, $a1, 0 +; CHECK-NEXT: ld.w $t6, $a1, 0 +; CHECK-NEXT: ld.w $t5, $a1, 0 +; CHECK-NEXT: ld.w $t4, $a1, 0 +; CHECK-NEXT: ld.w $t3, $a1, 0 +; CHECK-NEXT: ld.w $t2, $a1, 0 +; CHECK-NEXT: ld.w $t1, $a1, 0 +; CHECK-NEXT: ld.w $t0, $a1, 0 +; CHECK-NEXT: ld.w $a7, $a1, 0 +; CHECK-NEXT: ld.w $a6, $a1, 0 +; CHECK-NEXT: ld.w $a5, $a1, 0 +; CHECK-NEXT: ld.w $a4, $a1, 0 +; CHECK-NEXT: ld.w $a3, $a1, 0 +; CHECK-NEXT: ld.w $a2, $a1, 0 +; CHECK-NEXT: ld.w $a0, $a1, 0 +; CHECK-NEXT: st.d $fp, $sp, 0 +; CHECK-NEXT: lu12i.w $fp, 1 +; CHECK-NEXT: ori $fp, $fp, 12 +; CHECK-NEXT: add.d $fp, $sp, $fp +; CHECK-NEXT: st.w $t8, $fp, 0 +; CHECK-NEXT: ld.d $fp, $sp, 0 +; CHECK-NEXT: st.w $t8, $a1, 0 +; CHECK-NEXT: st.w $t7, $a1, 0 +; CHECK-NEXT: st.w $t6, $a1, 0 +; CHECK-NEXT: st.w $t5, $a1, 0 +; CHECK-NEXT: st.w $t4, $a1, 0 +; CHECK-NEXT: st.w $t3, $a1, 0 +; CHECK-NEXT: st.w $t2, $a1, 0 +; CHECK-NEXT: st.w $t1, $a1, 0 +; CHECK-NEXT: st.w $t0, $a1, 0 +; CHECK-NEXT: st.w $a7, $a1, 0 +; CHECK-NEXT: st.w $a6, $a1, 0 +; CHECK-NEXT: st.w $a5, $a1, 0 +; CHECK-NEXT: st.w $a4, $a1, 0 +; CHECK-NEXT: st.w $a3, $a1, 0 +; CHECK-NEXT: st.w $a2, $a1, 0 +; CHECK-NEXT: st.w $a0, $a1, 0 +; CHECK-NEXT: lu12i.w $a0, 1 +; CHECK-NEXT: ori $a0, $a0, 16 +; CHECK-NEXT: add.d $sp, $sp, $a0 +; CHECK-NEXT: ret + %space = alloca i32, align 4 + %stackspace = alloca[1024 x i32], align 4 + + ;; Load values to increase register pressure. + %v0 = load volatile i32, ptr @var + %v1 = load volatile i32, ptr @var + %v2 = load volatile i32, ptr @var + %v3 = load volatile i32, ptr @var + %v4 = load volatile i32, ptr @var + %v5 = load volatile i32, ptr @var + %v6 = load volatile i32, ptr @var + %v7 = load volatile i32, ptr @var + %v8 = load volatile i32, ptr @var + %v9 = load volatile i32, ptr @var + %v10 = load volatile i32, ptr @var + %v11 = load volatile i32, ptr @var + %v12 = load volatile i32, ptr @var + %v13 = load volatile i32, ptr @var + %v14 = load volatile i32, ptr @var + %v15 = load volatile i32, ptr @var + + ;; Computing a stack-relative values needs an additional register. + ;; We should get an emergency spill/reload for this. + store volatile i32 %v0, ptr %space + + ;; store values so they are used. + store volatile i32 %v0, ptr @var + store volatile i32 %v1, ptr @var + store volatile i32 %v2, ptr @var + store volatile i32 %v3, ptr @var + store volatile i32 %v4, ptr @var + store volatile i32 %v5, ptr @var + store volatile i32 %v6, ptr @var + store volatile i32 %v7, ptr @var + store volatile i32 %v8, ptr @var + store volatile i32 %v9, ptr @var + store volatile i32 %v10, ptr @var + store volatile i32 %v11, ptr @var + store volatile i32 %v12, ptr @var + store volatile i32 %v13, ptr @var + store volatile i32 %v14, ptr @var + store volatile i32 %v15, ptr @var + + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/frame.ll b/llvm/test/CodeGen/LoongArch/frame.ll --- a/llvm/test/CodeGen/LoongArch/frame.ll +++ b/llvm/test/CodeGen/LoongArch/frame.ll @@ -27,6 +27,7 @@ ret i32 0 } +;; Note: will create an emergency spill slot, if (!isInt<11>(StackSize)). ;; Should involve only one SP-adjusting addi per adjustment. define void @test_large_frame_size_2032() { ; CHECK-LABEL: test_large_frame_size_2032: @@ -35,7 +36,7 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 2032 ; CHECK-NEXT: addi.d $sp, $sp, 2032 ; CHECK-NEXT: ret - %1 = alloca i8, i32 2032 + %1 = alloca i8, i32 2016 ; + 16(emergency slot) = 2032 ret void } @@ -49,7 +50,7 @@ ; CHECK-NEXT: addi.d $sp, $sp, 2032 ; CHECK-NEXT: addi.d $sp, $sp, 16 ; CHECK-NEXT: ret - %1 = alloca i8, i32 2048 + %1 = alloca i8, i32 2032 ; + 16(emergency slot) = 2048 ret void } @@ -63,20 +64,46 @@ ; CHECK-NEXT: addi.d $sp, $sp, 2032 ; CHECK-NEXT: addi.d $sp, $sp, 32 ; CHECK-NEXT: ret - %1 = alloca i8, i32 2064 + %1 = alloca i8, i32 2048 ; + 16(emergency slot) = 2064 ret void } +;; NOTE: Due to the problem with the emegensy spill slot, the scratch register +;; will not be used when the fp is eliminated. To make this test valid, add the +;; attribute "frame-pointer=all". + ;; SP should be adjusted with help of a scratch register. -define void @test_large_frame_size_1234576() { +define void @test_large_frame_size_1234576() "frame-pointer"="all" { ; CHECK-LABEL: test_large_frame_size_1234576: ; CHECK: # %bb.0: ; CHECK-NEXT: lu12i.w $a0, 301 -; CHECK-NEXT: ori $a0, $a0, 1680 +; CHECK-NEXT: ori $a0, $a0, 1696 ; CHECK-NEXT: sub.d $sp, $sp, $a0 -; CHECK-NEXT: .cfi_def_cfa_offset 1234576 +; CHECK-NEXT: .cfi_def_cfa_offset 1234592 +; CHECK-NEXT: lu12i.w $a0, 301 +; CHECK-NEXT: ori $a0, $a0, 1688 +; CHECK-NEXT: add.d $a0, $sp, $a0 +; CHECK-NEXT: st.d $ra, $a0, 0 # 8-byte Folded Spill +; CHECK-NEXT: lu12i.w $a0, 301 +; CHECK-NEXT: ori $a0, $a0, 1680 +; CHECK-NEXT: add.d $a0, $sp, $a0 +; CHECK-NEXT: st.d $fp, $a0, 0 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: lu12i.w $a0, 301 +; CHECK-NEXT: ori $a0, $a0, 1696 +; CHECK-NEXT: add.d $fp, $sp, $a0 +; CHECK-NEXT: .cfi_def_cfa 22, 0 ; CHECK-NEXT: lu12i.w $a0, 301 ; CHECK-NEXT: ori $a0, $a0, 1680 +; CHECK-NEXT: add.d $a0, $sp, $a0 +; CHECK-NEXT: ld.d $fp, $a0, 0 # 8-byte Folded Reload +; CHECK-NEXT: lu12i.w $a0, 301 +; CHECK-NEXT: ori $a0, $a0, 1688 +; CHECK-NEXT: add.d $a0, $sp, $a0 +; CHECK-NEXT: ld.d $ra, $a0, 0 # 8-byte Folded Reload +; CHECK-NEXT: lu12i.w $a0, 301 +; CHECK-NEXT: ori $a0, $a0, 1696 ; CHECK-NEXT: add.d $sp, $sp, $a0 ; CHECK-NEXT: ret %1 = alloca i8, i32 1234567