diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -118,12 +118,32 @@ MFI.setStackSize(FrameSize); } +static uint64_t estimateFunctionSizeInBytes(const LoongArchInstrInfo *TII, + const MachineFunction &MF) { + uint64_t FuncSize = 0; + for (auto &MBB : MF) + for (auto &MI : MBB) + FuncSize += TII->getInstSizeInBytes(MI); + return FuncSize; +} + void LoongArchFrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { const LoongArchRegisterInfo *RI = STI.getRegisterInfo(); const TargetRegisterClass &RC = LoongArch::GPRRegClass; + const LoongArchInstrInfo *TII = STI.getInstrInfo(); + LoongArchMachineFunctionInfo *LAFI = + MF.getInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); + // Far branches beyond 27-bit offset require a spill slot for scratch register. + if (!isInt<27>(estimateFunctionSizeInBytes(TII, MF))) { + int FI = MFI.CreateStackObject(RI->getSpillSize(RC), RI->getSpillAlign(RC), + false); + RS->addScavengingFrameIndex(FI); + if (LAFI->getBranchRelaxationSpillFrameIndex() == -1) + LAFI->setBranchRelaxationSpillFrameIndex(FI); + } // estimateStackSize has been observed to under-estimate the final stack // size, so give ourselves wiggle-room by checking for stack size // representable an 11-bit signed field rather than 12-bits. diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -371,6 +371,9 @@ MachineFunction *MF = MBB.getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + LoongArchMachineFunctionInfo *LAFI = + MF->getInfo(); if (!isInt<32>(BrOffset)) report_fatal_error( @@ -379,26 +382,45 @@ Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass); auto II = MBB.end(); - MachineInstr &MI = + MachineInstr &PCALAU12I = *BuildMI(MBB, II, DL, get(LoongArch::PCALAU12I), ScratchReg) .addMBB(&DestBB, LoongArchII::MO_PCREL_HI); - BuildMI(MBB, II, DL, - get(STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W), - ScratchReg) - .addReg(ScratchReg) - .addMBB(&DestBB, LoongArchII::MO_PCREL_LO); + MachineInstr &ADDI = + *BuildMI(MBB, II, DL, + get(STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W), + ScratchReg) + .addReg(ScratchReg) + .addMBB(&DestBB, LoongArchII::MO_PCREL_LO); BuildMI(MBB, II, DL, get(LoongArch::PseudoBRIND)) .addReg(ScratchReg, RegState::Kill) .addImm(0); RS->enterBasicBlockEnd(MBB); - Register Scav = RS->scavengeRegisterBackwards(LoongArch::GPRRegClass, - MI.getIterator(), false, 0); - // TODO: When there is no scavenged register, it needs to specify a register. - assert(Scav != LoongArch::NoRegister && "No register is scavenged!"); + Register Scav = RS->scavengeRegisterBackwards( + LoongArch::GPRRegClass, PCALAU12I.getIterator(), /*RestoreAfter=*/false, + /*SPAdj=*/0, /*AllowSpill=*/false); + if (Scav != LoongArch::NoRegister) + RS->setRegUsed(Scav); + else { + // When there is no scavenged register, it needs to specify a register. + // Specify t8 register because it won't be used too often. + Scav = LoongArch::R20; + int FrameIndex = LAFI->getBranchRelaxationSpillFrameIndex(); + if (FrameIndex == -1) + report_fatal_error("The function size is incorrectly estimated."); + storeRegToStackSlot(MBB, PCALAU12I, Scav, /*IsKill=*/true, FrameIndex, + &LoongArch::GPRRegClass, TRI); + TRI->eliminateFrameIndex(std::prev(PCALAU12I.getIterator()), + /*SpAdj=*/0, /*FIOperandNum=*/1); + PCALAU12I.getOperand(1).setMBB(&RestoreBB); + ADDI.getOperand(2).setMBB(&RestoreBB); + loadRegFromStackSlot(RestoreBB, RestoreBB.end(), Scav, FrameIndex, + &LoongArch::GPRRegClass, TRI); + TRI->eliminateFrameIndex(RestoreBB.back(), + /*SpAdj=*/0, /*FIOperandNum=*/1); + } MRI.replaceRegWith(ScratchReg, Scav); MRI.clearVirtRegs(); - RS->setRegUsed(Scav); } static unsigned getOppositeBranchOpc(unsigned Opc) { diff --git a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h --- a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h @@ -32,6 +32,10 @@ /// Size of stack frame to save callee saved registers unsigned CalleeSavedStackSize = 0; + /// FrameIndex of the spill slot when there is no scavenged register in + /// insertIndirectBranch. + int BranchRelaxationSpillFrameIndex = -1; + public: LoongArchMachineFunctionInfo(const MachineFunction &MF) {} @@ -50,6 +54,13 @@ unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; } void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; } + + int getBranchRelaxationSpillFrameIndex() { + return BranchRelaxationSpillFrameIndex; + } + void setBranchRelaxationSpillFrameIndex(int Index) { + BranchRelaxationSpillFrameIndex = Index; + } }; } // end namespace llvm diff --git a/llvm/test/CodeGen/LoongArch/branch-relaxation-spill-32.ll b/llvm/test/CodeGen/LoongArch/branch-relaxation-spill-32.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/branch-relaxation-spill-32.ll @@ -0,0 +1,313 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --filetype=obj --verify-machineinstrs < %s \ +; RUN: -o /dev/null 2>&1 +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s | FileCheck %s + +define void @relax_b28_spill() { +; CHECK-LABEL: relax_b28_spill: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $sp, $sp, -48 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill +; CHECK-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s5, $sp, 16 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s6, $sp, 12 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s7, $sp, 8 # 4-byte Folded Spill +; CHECK-NEXT: st.w $s8, $sp, 4 # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -4 +; CHECK-NEXT: .cfi_offset 22, -8 +; CHECK-NEXT: .cfi_offset 23, -12 +; CHECK-NEXT: .cfi_offset 24, -16 +; CHECK-NEXT: .cfi_offset 25, -20 +; CHECK-NEXT: .cfi_offset 26, -24 +; CHECK-NEXT: .cfi_offset 27, -28 +; CHECK-NEXT: .cfi_offset 28, -32 +; CHECK-NEXT: .cfi_offset 29, -36 +; CHECK-NEXT: .cfi_offset 30, -40 +; CHECK-NEXT: .cfi_offset 31, -44 +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $zero, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $ra, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $tp, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a1, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a2, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a3, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a4, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a5, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a6, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $a7, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t0, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t1, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t2, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t3, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t4, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t5, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t6, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t7, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $t8, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $fp, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s0, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s1, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s2, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s3, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s4, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s5, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s6, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s7, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.w $s8, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beq $s7, $s8, .LBB0_1 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: st.w $t8, $sp, 0 +; CHECK-NEXT: pcalau12i $t8, %pc_hi20(.LBB0_5) +; CHECK-NEXT: addi.w $t8, $t8, %pc_lo12(.LBB0_5) +; CHECK-NEXT: jr $t8 +; CHECK-NEXT: .LBB0_1: # %iftrue +; CHECK-NEXT: #APP +; CHECK-NEXT: .space 536870912 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_5: # %iftrue +; CHECK-NEXT: ld.w $t8, $sp, 0 +; CHECK-NEXT: # %bb.2: # %iffalse +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $zero +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $ra +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $tp +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t8 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $fp +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s8 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: .LBB0_3: # %iftrue +; CHECK-NEXT: ld.w $s8, $sp, 4 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s7, $sp, 8 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload +; CHECK-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload +; CHECK-NEXT: addi.w $sp, $sp, 48 +; CHECK-NEXT: ret + %zero = call i32 asm sideeffect "addi.w $$zero, $$zero, 1", "={r0}"() + %ra = call i32 asm sideeffect "addi.w $$ra, $$zero, 1", "={r1}"() + %tp = call i32 asm sideeffect "addi.w $$tp, $$zero, 1", "={r2}"() + %a0 = call i32 asm sideeffect "addi.w $$a0, $$zero, 1", "={r4}"() + %a1 = call i32 asm sideeffect "addi.w $$a1, $$zero, 1", "={r5}"() + %a2 = call i32 asm sideeffect "addi.w $$a2, $$zero, 1", "={r6}"() + %a3 = call i32 asm sideeffect "addi.w $$a3, $$zero, 1", "={r7}"() + %a4 = call i32 asm sideeffect "addi.w $$a4, $$zero, 1", "={r8}"() + %a5 = call i32 asm sideeffect "addi.w $$a5, $$zero, 1", "={r9}"() + %a6 = call i32 asm sideeffect "addi.w $$a6, $$zero, 1", "={r10}"() + %a7 = call i32 asm sideeffect "addi.w $$a7, $$zero, 1", "={r11}"() + %t0 = call i32 asm sideeffect "addi.w $$t0, $$zero, 1", "={r12}"() + %t1 = call i32 asm sideeffect "addi.w $$t1, $$zero, 1", "={r13}"() + %t2 = call i32 asm sideeffect "addi.w $$t2, $$zero, 1", "={r14}"() + %t3 = call i32 asm sideeffect "addi.w $$t3, $$zero, 1", "={r15}"() + %t4 = call i32 asm sideeffect "addi.w $$t4, $$zero, 1", "={r16}"() + %t5 = call i32 asm sideeffect "addi.w $$t5, $$zero, 1", "={r17}"() + %t6 = call i32 asm sideeffect "addi.w $$t6, $$zero, 1", "={r18}"() + %t7 = call i32 asm sideeffect "addi.w $$t7, $$zero, 1", "={r19}"() + %t8 = call i32 asm sideeffect "addi.w $$t8, $$zero, 1", "={r20}"() + ;; r21 Reserved (Non-allocatable) + %s9 = call i32 asm sideeffect "addi.w $$s9, $$zero, 1", "={r22}"() + %s0 = call i32 asm sideeffect "addi.w $$s0, $$zero, 1", "={r23}"() + %s1 = call i32 asm sideeffect "addi.w $$s1, $$zero, 1", "={r24}"() + %s2 = call i32 asm sideeffect "addi.w $$s2, $$zero, 1", "={r25}"() + %s3 = call i32 asm sideeffect "addi.w $$s3, $$zero, 1", "={r26}"() + %s4 = call i32 asm sideeffect "addi.w $$s4, $$zero, 1", "={r27}"() + %s5 = call i32 asm sideeffect "addi.w $$s5, $$zero, 1", "={r28}"() + %s6 = call i32 asm sideeffect "addi.w $$s6, $$zero, 1", "={r29}"() + %s7 = call i32 asm sideeffect "addi.w $$s7, $$zero, 1", "={r30}"() + %s8 = call i32 asm sideeffect "addi.w $$s8, $$zero, 1", "={r31}"() + + %cmp = icmp eq i32 %s7, %s8 + br i1 %cmp, label %iftrue, label %iffalse + +iftrue: + call void asm sideeffect ".space 536870912", ""() + ret void + +iffalse: + call void asm sideeffect "# reg use $0", "{r0}"(i32 %zero) + call void asm sideeffect "# reg use $0", "{r1}"(i32 %ra) + call void asm sideeffect "# reg use $0", "{r2}"(i32 %tp) + call void asm sideeffect "# reg use $0", "{r4}"(i32 %a0) + call void asm sideeffect "# reg use $0", "{r5}"(i32 %a1) + call void asm sideeffect "# reg use $0", "{r6}"(i32 %a2) + call void asm sideeffect "# reg use $0", "{r7}"(i32 %a3) + call void asm sideeffect "# reg use $0", "{r8}"(i32 %a4) + call void asm sideeffect "# reg use $0", "{r9}"(i32 %a5) + call void asm sideeffect "# reg use $0", "{r10}"(i32 %a6) + call void asm sideeffect "# reg use $0", "{r11}"(i32 %a7) + call void asm sideeffect "# reg use $0", "{r12}"(i32 %t0) + call void asm sideeffect "# reg use $0", "{r13}"(i32 %t1) + call void asm sideeffect "# reg use $0", "{r14}"(i32 %t2) + call void asm sideeffect "# reg use $0", "{r15}"(i32 %t3) + call void asm sideeffect "# reg use $0", "{r16}"(i32 %t4) + call void asm sideeffect "# reg use $0", "{r17}"(i32 %t5) + call void asm sideeffect "# reg use $0", "{r18}"(i32 %t6) + call void asm sideeffect "# reg use $0", "{r19}"(i32 %t7) + call void asm sideeffect "# reg use $0", "{r20}"(i32 %t8) + ;; r21 Reserved (Non-allocatable) + call void asm sideeffect "# reg use $0", "{r22}"(i32 %s9) + call void asm sideeffect "# reg use $0", "{r23}"(i32 %s0) + call void asm sideeffect "# reg use $0", "{r24}"(i32 %s1) + call void asm sideeffect "# reg use $0", "{r25}"(i32 %s2) + call void asm sideeffect "# reg use $0", "{r26}"(i32 %s3) + call void asm sideeffect "# reg use $0", "{r27}"(i32 %s4) + call void asm sideeffect "# reg use $0", "{r28}"(i32 %s5) + call void asm sideeffect "# reg use $0", "{r29}"(i32 %s6) + call void asm sideeffect "# reg use $0", "{r30}"(i32 %s7) + call void asm sideeffect "# reg use $0", "{r31}"(i32 %s8) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/branch-relaxation-spill-64.ll b/llvm/test/CodeGen/LoongArch/branch-relaxation-spill-64.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/branch-relaxation-spill-64.ll @@ -0,0 +1,313 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --filetype=obj --verify-machineinstrs < %s \ +; RUN: -o /dev/null 2>&1 +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s | FileCheck %s + +define void @relax_b28_spill() { +; CHECK-LABEL: relax_b28_spill: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -96 +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s0, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s1, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s2, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s3, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s4, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s5, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s6, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s7, $sp, 16 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s8, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: .cfi_offset 23, -24 +; CHECK-NEXT: .cfi_offset 24, -32 +; CHECK-NEXT: .cfi_offset 25, -40 +; CHECK-NEXT: .cfi_offset 26, -48 +; CHECK-NEXT: .cfi_offset 27, -56 +; CHECK-NEXT: .cfi_offset 28, -64 +; CHECK-NEXT: .cfi_offset 29, -72 +; CHECK-NEXT: .cfi_offset 30, -80 +; CHECK-NEXT: .cfi_offset 31, -88 +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $zero, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $ra, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $tp, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a0, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a1, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a2, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a3, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a4, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a5, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a6, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $a7, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t0, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t1, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t2, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t3, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t4, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t5, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t6, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t7, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $t8, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $fp, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s0, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s1, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s2, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s3, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s4, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s5, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s6, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s7, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: addi.d $s8, $zero, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beq $s7, $s8, .LBB0_1 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: st.d $t8, $sp, 0 +; CHECK-NEXT: pcalau12i $t8, %pc_hi20(.LBB0_5) +; CHECK-NEXT: addi.d $t8, $t8, %pc_lo12(.LBB0_5) +; CHECK-NEXT: jr $t8 +; CHECK-NEXT: .LBB0_1: # %iftrue +; CHECK-NEXT: #APP +; CHECK-NEXT: .space 536870912 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_5: # %iftrue +; CHECK-NEXT: ld.d $t8, $sp, 0 +; CHECK-NEXT: # %bb.2: # %iffalse +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $zero +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $ra +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $tp +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $a7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $t8 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $fp +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use $s8 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: .LBB0_3: # %iftrue +; CHECK-NEXT: ld.d $s8, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s7, $sp, 16 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s6, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s5, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s4, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s3, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s2, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s1, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $s0, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 96 +; CHECK-NEXT: ret + %zero = call i64 asm sideeffect "addi.d $$zero, $$zero, 1", "={r0}"() + %ra = call i64 asm sideeffect "addi.d $$ra, $$zero, 1", "={r1}"() + %tp = call i64 asm sideeffect "addi.d $$tp, $$zero, 1", "={r2}"() + %a0 = call i64 asm sideeffect "addi.d $$a0, $$zero, 1", "={r4}"() + %a1 = call i64 asm sideeffect "addi.d $$a1, $$zero, 1", "={r5}"() + %a2 = call i64 asm sideeffect "addi.d $$a2, $$zero, 1", "={r6}"() + %a3 = call i64 asm sideeffect "addi.d $$a3, $$zero, 1", "={r7}"() + %a4 = call i64 asm sideeffect "addi.d $$a4, $$zero, 1", "={r8}"() + %a5 = call i64 asm sideeffect "addi.d $$a5, $$zero, 1", "={r9}"() + %a6 = call i64 asm sideeffect "addi.d $$a6, $$zero, 1", "={r10}"() + %a7 = call i64 asm sideeffect "addi.d $$a7, $$zero, 1", "={r11}"() + %t0 = call i64 asm sideeffect "addi.d $$t0, $$zero, 1", "={r12}"() + %t1 = call i64 asm sideeffect "addi.d $$t1, $$zero, 1", "={r13}"() + %t2 = call i64 asm sideeffect "addi.d $$t2, $$zero, 1", "={r14}"() + %t3 = call i64 asm sideeffect "addi.d $$t3, $$zero, 1", "={r15}"() + %t4 = call i64 asm sideeffect "addi.d $$t4, $$zero, 1", "={r16}"() + %t5 = call i64 asm sideeffect "addi.d $$t5, $$zero, 1", "={r17}"() + %t6 = call i64 asm sideeffect "addi.d $$t6, $$zero, 1", "={r18}"() + %t7 = call i64 asm sideeffect "addi.d $$t7, $$zero, 1", "={r19}"() + %t8 = call i64 asm sideeffect "addi.d $$t8, $$zero, 1", "={r20}"() + ;; r21 Reserved (Non-allocatable) + %s9 = call i64 asm sideeffect "addi.d $$s9, $$zero, 1", "={r22}"() + %s0 = call i64 asm sideeffect "addi.d $$s0, $$zero, 1", "={r23}"() + %s1 = call i64 asm sideeffect "addi.d $$s1, $$zero, 1", "={r24}"() + %s2 = call i64 asm sideeffect "addi.d $$s2, $$zero, 1", "={r25}"() + %s3 = call i64 asm sideeffect "addi.d $$s3, $$zero, 1", "={r26}"() + %s4 = call i64 asm sideeffect "addi.d $$s4, $$zero, 1", "={r27}"() + %s5 = call i64 asm sideeffect "addi.d $$s5, $$zero, 1", "={r28}"() + %s6 = call i64 asm sideeffect "addi.d $$s6, $$zero, 1", "={r29}"() + %s7 = call i64 asm sideeffect "addi.d $$s7, $$zero, 1", "={r30}"() + %s8 = call i64 asm sideeffect "addi.d $$s8, $$zero, 1", "={r31}"() + + %cmp = icmp eq i64 %s7, %s8 + br i1 %cmp, label %iftrue, label %iffalse + +iftrue: + call void asm sideeffect ".space 536870912", ""() + ret void + +iffalse: + call void asm sideeffect "# reg use $0", "{r0}"(i64 %zero) + call void asm sideeffect "# reg use $0", "{r1}"(i64 %ra) + call void asm sideeffect "# reg use $0", "{r2}"(i64 %tp) + call void asm sideeffect "# reg use $0", "{r4}"(i64 %a0) + call void asm sideeffect "# reg use $0", "{r5}"(i64 %a1) + call void asm sideeffect "# reg use $0", "{r6}"(i64 %a2) + call void asm sideeffect "# reg use $0", "{r7}"(i64 %a3) + call void asm sideeffect "# reg use $0", "{r8}"(i64 %a4) + call void asm sideeffect "# reg use $0", "{r9}"(i64 %a5) + call void asm sideeffect "# reg use $0", "{r10}"(i64 %a6) + call void asm sideeffect "# reg use $0", "{r11}"(i64 %a7) + call void asm sideeffect "# reg use $0", "{r12}"(i64 %t0) + call void asm sideeffect "# reg use $0", "{r13}"(i64 %t1) + call void asm sideeffect "# reg use $0", "{r14}"(i64 %t2) + call void asm sideeffect "# reg use $0", "{r15}"(i64 %t3) + call void asm sideeffect "# reg use $0", "{r16}"(i64 %t4) + call void asm sideeffect "# reg use $0", "{r17}"(i64 %t5) + call void asm sideeffect "# reg use $0", "{r18}"(i64 %t6) + call void asm sideeffect "# reg use $0", "{r19}"(i64 %t7) + call void asm sideeffect "# reg use $0", "{r20}"(i64 %t8) + ;; r21 Reserved (Non-allocatable) + call void asm sideeffect "# reg use $0", "{r22}"(i64 %s9) + call void asm sideeffect "# reg use $0", "{r23}"(i64 %s0) + call void asm sideeffect "# reg use $0", "{r24}"(i64 %s1) + call void asm sideeffect "# reg use $0", "{r25}"(i64 %s2) + call void asm sideeffect "# reg use $0", "{r26}"(i64 %s3) + call void asm sideeffect "# reg use $0", "{r27}"(i64 %s4) + call void asm sideeffect "# reg use $0", "{r28}"(i64 %s5) + call void asm sideeffect "# reg use $0", "{r29}"(i64 %s6) + call void asm sideeffect "# reg use $0", "{r30}"(i64 %s7) + call void asm sideeffect "# reg use $0", "{r31}"(i64 %s8) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/branch-relaxation.ll b/llvm/test/CodeGen/LoongArch/branch-relaxation.ll --- a/llvm/test/CodeGen/LoongArch/branch-relaxation.ll +++ b/llvm/test/CodeGen/LoongArch/branch-relaxation.ll @@ -88,6 +88,8 @@ define i32 @relax_b28(i1 %a) { ; LA32-LABEL: relax_b28: ; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 ; LA32-NEXT: andi $a0, $a0, 1 ; LA32-NEXT: bnez $a0, .LBB2_1 ; LA32-NEXT: # %bb.3: @@ -99,13 +101,17 @@ ; LA32-NEXT: .space 536870912 ; LA32-NEXT: #NO_APP ; LA32-NEXT: ori $a0, $zero, 1 +; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret ; LA32-NEXT: .LBB2_2: # %iffalse ; LA32-NEXT: move $a0, $zero +; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret ; ; LA64-LABEL: relax_b28: ; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 ; LA64-NEXT: andi $a0, $a0, 1 ; LA64-NEXT: bnez $a0, .LBB2_1 ; LA64-NEXT: # %bb.3: @@ -117,9 +123,11 @@ ; LA64-NEXT: .space 536870912 ; LA64-NEXT: #NO_APP ; LA64-NEXT: ori $a0, $zero, 1 +; LA64-NEXT: addi.d $sp, $sp, 16 ; LA64-NEXT: ret ; LA64-NEXT: .LBB2_2: # %iffalse ; LA64-NEXT: move $a0, $zero +; LA64-NEXT: addi.d $sp, $sp, 16 ; LA64-NEXT: ret br i1 %a, label %iftrue, label %iffalse