diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -996,10 +996,50 @@ return MaxScavSlotsNum; } +static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF, + const RISCVInstrInfo &TII) { + unsigned FnSize = 0; + for (auto &MBB : MF) { + for (auto &MI : MBB) { + // Far branches over 20-bit offset will be relaxed in branch relaxation + // pass. In the worst case, conditional branches will be relaxed into + // the following instruction sequence. Unconditional branches are + // relaxed in the same way, with the exception that there is no first + // branch instruction. + // + // foo + // bne t5, t6, .rev_cond # `TII->getInstSizeInBytes(MI)` bytes + // sd s11, 0(sp) # 4 bytes, or 2 bytes in RVC + // jump .restore, s11 # 8 bytes + // .rev_cond + // bar + // j .dest_bb # 4 bytes, or 2 bytes in RVC + // .restore: + // ld s11, 0(sp) # 4 bytes, or 2 bytes in RVC + // .dest: + // baz + + if (MI.isConditionalBranch()) + FnSize += TII.getInstSizeInBytes(MI); + if (MI.isConditionalBranch() || MI.isUnconditionalBranch()) { + if (MF.getSubtarget().hasStdExtC()) + FnSize += 2 + 8 + 2 + 2; + else + FnSize += 4 + 8 + 4 + 4; + continue; + } + + FnSize += TII.getInstSizeInBytes(MI); + } + } + return FnSize; +} + void RISCVFrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { const RISCVRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + const RISCVInstrInfo *TII = MF.getSubtarget().getInstrInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterClass *RC = &RISCV::GPRRegClass; auto *RVFI = MF.getInfo(); @@ -1019,9 +1059,6 @@ // estimateStackSize has been observed to under-estimate the final stack // size, so give ourselves wiggle-room by checking for stack size // representable an 11-bit signed field rather than 12-bits. - // FIXME: It may be possible to craft a function with a small stack that - // still needs an emergency spill slot for branch relaxation. This case - // would currently be missed. // RVV loads & stores have no capacity to hold the immediate address offsets // so we must always reserve an emergency spill slot if the MachineFunction // contains any RVV spills. @@ -1029,10 +1066,19 @@ if (!isInt<11>(MFI.estimateStackSize(MF))) ScavSlotsNum = 1; + // Far branches over 20-bit offset require a spill slot for scratch register. + bool IsLargeFunction = !isInt<20>(EstimateFunctionSizeInBytes(MF, *TII)); + if (IsLargeFunction) + ScavSlotsNum = std::max(ScavSlotsNum, 1u); + ScavSlotsNum = std::max(ScavSlotsNum, getScavSlotsNumForRVV(MF)); for (unsigned i = 0; i < ScavSlotsNum; i++) { - RS->addScavengingFrameIndex(MFI.CreateStackObject( - RegInfo->getSpillSize(*RC), RegInfo->getSpillAlign(*RC), false)); + int FI = MFI.CreateStackObject(RegInfo->getSpillSize(*RC), + RegInfo->getSpillAlign(*RC), false); + RS->addScavengingFrameIndex(FI); + + if (IsLargeFunction && i == 0) + RVFI->setBranchRelaxationScratchFrameIndex(FI); } if (MFI.getCalleeSavedInfo().empty() || RVFI->useSaveRestoreLibCalls(MF)) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -902,9 +902,13 @@ assert(MBB.empty() && "new block should be inserted for expanding unconditional branch"); assert(MBB.pred_size() == 1); + assert(RestoreBB.empty() && + "restore block should be inserted for restoring clobbered registers"); MachineFunction *MF = MBB.getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); + RISCVMachineFunctionInfo *RVMFI = MF->getInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); if (!isInt<32>(BrOffset)) report_fatal_error( @@ -915,19 +919,43 @@ // uses the same workaround). Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); auto II = MBB.end(); - + // We may also update the jump target to RestoreBB later. MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump)) .addReg(ScratchReg, RegState::Define | RegState::Dead) .addMBB(&DestBB, RISCVII::MO_CALL); RS->enterBasicBlockEnd(MBB); - Register Scav = RS->scavengeRegisterBackwards(RISCV::GPRRegClass, - MI.getIterator(), false, 0); - // TODO: The case when there is no scavenged register needs special handling. - assert(Scav != RISCV::NoRegister && "No register is scavenged!"); - MRI.replaceRegWith(ScratchReg, Scav); + Register TmpGPR = + RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(), + /*RestoreAfter=*/false, /*SpAdj=*/0, + /*AllowSpill=*/false); + if (TmpGPR != RISCV::NoRegister) + RS->setRegUsed(TmpGPR); + else { + // The case when there is no scavenged register needs special handling. + + // Pick s11 because it doesn't make a difference. + TmpGPR = RISCV::X27; + + int FrameIndex = RVMFI->getBranchRelaxationScratchFrameIndex(); + if (FrameIndex == -1) + report_fatal_error("underestimated function size"); + + storeRegToStackSlot(MBB, MI, TmpGPR, /*IsKill=*/true, FrameIndex, + &RISCV::GPRRegClass, TRI); + TRI->eliminateFrameIndex(std::prev(MI.getIterator()), + /*SpAdj=*/0, /*FIOperandNum=*/1); + + MI.getOperand(1).setMBB(&RestoreBB); + + loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex, + &RISCV::GPRRegClass, TRI); + TRI->eliminateFrameIndex(RestoreBB.back(), + /*SpAdj=*/0, /*FIOperandNum=*/1); + } + + MRI.replaceRegWith(ScratchReg, TmpGPR); MRI.clearVirtRegs(); - RS->setRegUsed(Scav); } bool RISCVInstrInfo::reverseBranchCondition( diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h --- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -53,6 +53,8 @@ /// FrameIndex used for transferring values between 64-bit FPRs and a pair /// of 32-bit GPRs via the stack. int MoveF64FrameIndex = -1; + /// FrameIndex of the spill slot for the scratch register in BranchRelaxation. + int BranchRelaxationScratchFrameIndex = -1; /// Size of any opaque stack adjustment due to save/restore libcalls. unsigned LibCallStackSize = 0; /// Size of RVV stack. @@ -85,6 +87,13 @@ return MoveF64FrameIndex; } + int getBranchRelaxationScratchFrameIndex() const { + return BranchRelaxationScratchFrameIndex; + } + void setBranchRelaxationScratchFrameIndex(int Index) { + BranchRelaxationScratchFrameIndex = Index; + } + unsigned getLibCallStackSize() const { return LibCallStackSize; } void setLibCallStackSize(unsigned Size) { LibCallStackSize = Size; } diff --git a/llvm/test/CodeGen/RISCV/branch-relaxation-spill-32.ll b/llvm/test/CodeGen/RISCV/branch-relaxation-spill-32.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/branch-relaxation-spill-32.ll @@ -0,0 +1,318 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s +; RUN: llc -mtriple=riscv32 -relocation-model=pic -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +define void @relax_spill() { +; CHECK-LABEL: relax_spill: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -2032 +; CHECK-NEXT: .cfi_def_cfa_offset 2032 +; CHECK-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s0, 2024(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s1, 2020(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s2, 2016(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s3, 2012(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s4, 2008(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s5, 2004(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s6, 2000(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s7, 1996(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s8, 1992(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s9, 1988(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s10, 1984(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s11, 1980(sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -4 +; CHECK-NEXT: .cfi_offset s0, -8 +; CHECK-NEXT: .cfi_offset s1, -12 +; CHECK-NEXT: .cfi_offset s2, -16 +; CHECK-NEXT: .cfi_offset s3, -20 +; CHECK-NEXT: .cfi_offset s4, -24 +; CHECK-NEXT: .cfi_offset s5, -28 +; CHECK-NEXT: .cfi_offset s6, -32 +; CHECK-NEXT: .cfi_offset s7, -36 +; CHECK-NEXT: .cfi_offset s8, -40 +; CHECK-NEXT: .cfi_offset s9, -44 +; CHECK-NEXT: .cfi_offset s10, -48 +; CHECK-NEXT: .cfi_offset s11, -52 +; CHECK-NEXT: addi s0, sp, 2032 +; CHECK-NEXT: .cfi_def_cfa s0, 0 +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: addi a0, a0, -2032 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: srli a0, sp, 12 +; CHECK-NEXT: slli sp, a0, 12 +; CHECK-NEXT: #APP +; CHECK-NEXT: li ra, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li t0, 5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li t1, 6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li t2, 7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s0, 8 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s1, 9 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li a0, 10 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li a2, 12 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li a4, 14 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li a5, 15 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li a6, 16 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li a7, 17 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s2, 18 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s3, 19 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s4, 20 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s5, 21 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s6, 22 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s7, 23 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s8, 24 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s9, 25 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s10, 26 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s11, 27 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li t3, 28 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li t4, 29 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li t5, 30 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li t6, 31 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beq t5, t6, .LBB0_1 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: sw s11, 0(sp) +; CHECK-NEXT: jump .LBB0_4, s11 +; CHECK-NEXT: .LBB0_1: # %branch_1 +; CHECK-NEXT: #APP +; CHECK-NEXT: .zero 1048576 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: j .LBB0_2 +; CHECK-NEXT: .LBB0_4: # %branch_2 +; CHECK-NEXT: lw s11, 0(sp) +; CHECK-NEXT: .LBB0_2: # %branch_2 +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use ra +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use t0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use t1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use t2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use a0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use a1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use a2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use a3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use a4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use a5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use a6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use a7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s8 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s9 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s10 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s11 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use t3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use t4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use t5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use t6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: sub sp, s0, a0 +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: addi a0, a0, -2032 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s0, 2024(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s1, 2020(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s2, 2016(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s3, 2012(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s4, 2008(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s5, 2004(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s6, 2000(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s7, 1996(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s8, 1992(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s9, 1988(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s10, 1984(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s11, 1980(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 2032 +; CHECK-NEXT: ret + + ; If the stack is large and the offset of BranchRelaxationScratchFrameIndex + ; is out the range of 12-bit signed integer, check whether the spill slot is + ; adjusted to close to the stack base register. + %stack_obj = alloca i32, align 4096 + + %ra = call i32 asm sideeffect "addi ra, x0, 1", "={ra}"() + %t0 = call i32 asm sideeffect "addi t0, x0, 5", "={t0}"() + %t1 = call i32 asm sideeffect "addi t1, x0, 6", "={t1}"() + %t2 = call i32 asm sideeffect "addi t2, x0, 7", "={t2}"() + %s0 = call i32 asm sideeffect "addi s0, x0, 8", "={s0}"() + %s1 = call i32 asm sideeffect "addi s1, x0, 9", "={s1}"() + %a0 = call i32 asm sideeffect "addi a0, x0, 10", "={a0}"() + %a1 = call i32 asm sideeffect "addi a1, x0, 11", "={a1}"() + %a2 = call i32 asm sideeffect "addi a2, x0, 12", "={a2}"() + %a3 = call i32 asm sideeffect "addi a3, x0, 13", "={a3}"() + %a4 = call i32 asm sideeffect "addi a4, x0, 14", "={a4}"() + %a5 = call i32 asm sideeffect "addi a5, x0, 15", "={a5}"() + %a6 = call i32 asm sideeffect "addi a6, x0, 16", "={a6}"() + %a7 = call i32 asm sideeffect "addi a7, x0, 17", "={a7}"() + %s2 = call i32 asm sideeffect "addi s2, x0, 18", "={s2}"() + %s3 = call i32 asm sideeffect "addi s3, x0, 19", "={s3}"() + %s4 = call i32 asm sideeffect "addi s4, x0, 20", "={s4}"() + %s5 = call i32 asm sideeffect "addi s5, x0, 21", "={s5}"() + %s6 = call i32 asm sideeffect "addi s6, x0, 22", "={s6}"() + %s7 = call i32 asm sideeffect "addi s7, x0, 23", "={s7}"() + %s8 = call i32 asm sideeffect "addi s8, x0, 24", "={s8}"() + %s9 = call i32 asm sideeffect "addi s9, x0, 25", "={s9}"() + %s10 = call i32 asm sideeffect "addi s10, x0, 26", "={s10}"() + %s11 = call i32 asm sideeffect "addi s11, x0, 27", "={s11}"() + %t3 = call i32 asm sideeffect "addi t3, x0, 28", "={t3}"() + %t4 = call i32 asm sideeffect "addi t4, x0, 29", "={t4}"() + %t5 = call i32 asm sideeffect "addi t5, x0, 30", "={t5}"() + %t6 = call i32 asm sideeffect "addi t6, x0, 31", "={t6}"() + + %cmp = icmp eq i32 %t5, %t6 + br i1 %cmp, label %branch_1, label %branch_2 + +branch_1: + call void asm sideeffect ".space 1048576", ""() + br label %branch_2 + +branch_2: + call void asm sideeffect "# reg use $0", "{ra}"(i32 %ra) + call void asm sideeffect "# reg use $0", "{t0}"(i32 %t0) + call void asm sideeffect "# reg use $0", "{t1}"(i32 %t1) + call void asm sideeffect "# reg use $0", "{t2}"(i32 %t2) + call void asm sideeffect "# reg use $0", "{s0}"(i32 %s0) + call void asm sideeffect "# reg use $0", "{s1}"(i32 %s1) + call void asm sideeffect "# reg use $0", "{a0}"(i32 %a0) + call void asm sideeffect "# reg use $0", "{a1}"(i32 %a1) + call void asm sideeffect "# reg use $0", "{a2}"(i32 %a2) + call void asm sideeffect "# reg use $0", "{a3}"(i32 %a3) + call void asm sideeffect "# reg use $0", "{a4}"(i32 %a4) + call void asm sideeffect "# reg use $0", "{a5}"(i32 %a5) + call void asm sideeffect "# reg use $0", "{a6}"(i32 %a6) + call void asm sideeffect "# reg use $0", "{a7}"(i32 %a7) + call void asm sideeffect "# reg use $0", "{s2}"(i32 %s2) + call void asm sideeffect "# reg use $0", "{s3}"(i32 %s3) + call void asm sideeffect "# reg use $0", "{s4}"(i32 %s4) + call void asm sideeffect "# reg use $0", "{s5}"(i32 %s5) + call void asm sideeffect "# reg use $0", "{s6}"(i32 %s6) + call void asm sideeffect "# reg use $0", "{s7}"(i32 %s7) + call void asm sideeffect "# reg use $0", "{s8}"(i32 %s8) + call void asm sideeffect "# reg use $0", "{s9}"(i32 %s9) + call void asm sideeffect "# reg use $0", "{s10}"(i32 %s10) + call void asm sideeffect "# reg use $0", "{s11}"(i32 %s11) + call void asm sideeffect "# reg use $0", "{t3}"(i32 %t3) + call void asm sideeffect "# reg use $0", "{t4}"(i32 %t4) + call void asm sideeffect "# reg use $0", "{t5}"(i32 %t5) + call void asm sideeffect "# reg use $0", "{t6}"(i32 %t6) + + ret void +} diff --git a/llvm/test/CodeGen/RISCV/branch-relaxation-spill-64.ll b/llvm/test/CodeGen/RISCV/branch-relaxation-spill-64.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/branch-relaxation-spill-64.ll @@ -0,0 +1,318 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s +; RUN: llc -mtriple=riscv64 -relocation-model=pic -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +define void @relax_spill() { +; CHECK-LABEL: relax_spill: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -2032 +; CHECK-NEXT: .cfi_def_cfa_offset 2032 +; CHECK-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 2008(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s2, 2000(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s3, 1992(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s4, 1984(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s5, 1976(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s6, 1968(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s7, 1960(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s8, 1952(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s9, 1944(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s10, 1936(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s11, 1928(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: .cfi_offset s0, -16 +; CHECK-NEXT: .cfi_offset s1, -24 +; CHECK-NEXT: .cfi_offset s2, -32 +; CHECK-NEXT: .cfi_offset s3, -40 +; CHECK-NEXT: .cfi_offset s4, -48 +; CHECK-NEXT: .cfi_offset s5, -56 +; CHECK-NEXT: .cfi_offset s6, -64 +; CHECK-NEXT: .cfi_offset s7, -72 +; CHECK-NEXT: .cfi_offset s8, -80 +; CHECK-NEXT: .cfi_offset s9, -88 +; CHECK-NEXT: .cfi_offset s10, -96 +; CHECK-NEXT: .cfi_offset s11, -104 +; CHECK-NEXT: addi s0, sp, 2032 +; CHECK-NEXT: .cfi_def_cfa s0, 0 +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: addiw a0, a0, -2032 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: srli a0, sp, 12 +; CHECK-NEXT: slli sp, a0, 12 +; CHECK-NEXT: #APP +; CHECK-NEXT: li ra, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li t0, 5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li t1, 6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li t2, 7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s0, 8 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s1, 9 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li a0, 10 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li a2, 12 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li a4, 14 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li a5, 15 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li a6, 16 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li a7, 17 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s2, 18 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s3, 19 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s4, 20 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s5, 21 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s6, 22 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s7, 23 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s8, 24 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s9, 25 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s10, 26 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li s11, 27 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li t3, 28 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li t4, 29 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li t5, 30 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: li t6, 31 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beq t5, t6, .LBB0_1 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: sd s11, 0(sp) +; CHECK-NEXT: jump .LBB0_4, s11 +; CHECK-NEXT: .LBB0_1: # %branch_1 +; CHECK-NEXT: #APP +; CHECK-NEXT: .zero 1048576 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: j .LBB0_2 +; CHECK-NEXT: .LBB0_4: # %branch_2 +; CHECK-NEXT: ld s11, 0(sp) +; CHECK-NEXT: .LBB0_2: # %branch_2 +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use ra +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use t0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use t1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use t2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use a0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use a1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use a2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use a3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use a4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use a5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use a6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use a7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s7 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s8 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s9 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s10 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use s11 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use t3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use t4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use t5 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: #APP +; CHECK-NEXT: # reg use t6 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: sub sp, s0, a0 +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: addiw a0, a0, -2032 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 2008(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s2, 2000(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s3, 1992(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s4, 1984(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s5, 1976(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s6, 1968(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s7, 1960(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s8, 1952(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s9, 1944(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s10, 1936(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s11, 1928(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 2032 +; CHECK-NEXT: ret + + ; If the stack is large and the offset of BranchRelaxationScratchFrameIndex + ; is out the range of 12-bit signed integer, check whether the spill slot is + ; adjusted to close to the stack base register. + %stack_obj = alloca i64, align 4096 + + %ra = call i64 asm sideeffect "addi ra, x0, 1", "={ra}"() + %t0 = call i64 asm sideeffect "addi t0, x0, 5", "={t0}"() + %t1 = call i64 asm sideeffect "addi t1, x0, 6", "={t1}"() + %t2 = call i64 asm sideeffect "addi t2, x0, 7", "={t2}"() + %s0 = call i64 asm sideeffect "addi s0, x0, 8", "={s0}"() + %s1 = call i64 asm sideeffect "addi s1, x0, 9", "={s1}"() + %a0 = call i64 asm sideeffect "addi a0, x0, 10", "={a0}"() + %a1 = call i64 asm sideeffect "addi a1, x0, 11", "={a1}"() + %a2 = call i64 asm sideeffect "addi a2, x0, 12", "={a2}"() + %a3 = call i64 asm sideeffect "addi a3, x0, 13", "={a3}"() + %a4 = call i64 asm sideeffect "addi a4, x0, 14", "={a4}"() + %a5 = call i64 asm sideeffect "addi a5, x0, 15", "={a5}"() + %a6 = call i64 asm sideeffect "addi a6, x0, 16", "={a6}"() + %a7 = call i64 asm sideeffect "addi a7, x0, 17", "={a7}"() + %s2 = call i64 asm sideeffect "addi s2, x0, 18", "={s2}"() + %s3 = call i64 asm sideeffect "addi s3, x0, 19", "={s3}"() + %s4 = call i64 asm sideeffect "addi s4, x0, 20", "={s4}"() + %s5 = call i64 asm sideeffect "addi s5, x0, 21", "={s5}"() + %s6 = call i64 asm sideeffect "addi s6, x0, 22", "={s6}"() + %s7 = call i64 asm sideeffect "addi s7, x0, 23", "={s7}"() + %s8 = call i64 asm sideeffect "addi s8, x0, 24", "={s8}"() + %s9 = call i64 asm sideeffect "addi s9, x0, 25", "={s9}"() + %s10 = call i64 asm sideeffect "addi s10, x0, 26", "={s10}"() + %s11 = call i64 asm sideeffect "addi s11, x0, 27", "={s11}"() + %t3 = call i64 asm sideeffect "addi t3, x0, 28", "={t3}"() + %t4 = call i64 asm sideeffect "addi t4, x0, 29", "={t4}"() + %t5 = call i64 asm sideeffect "addi t5, x0, 30", "={t5}"() + %t6 = call i64 asm sideeffect "addi t6, x0, 31", "={t6}"() + + %cmp = icmp eq i64 %t5, %t6 + br i1 %cmp, label %branch_1, label %branch_2 + +branch_1: + call void asm sideeffect ".space 1048576", ""() + br label %branch_2 + +branch_2: + call void asm sideeffect "# reg use $0", "{ra}"(i64 %ra) + call void asm sideeffect "# reg use $0", "{t0}"(i64 %t0) + call void asm sideeffect "# reg use $0", "{t1}"(i64 %t1) + call void asm sideeffect "# reg use $0", "{t2}"(i64 %t2) + call void asm sideeffect "# reg use $0", "{s0}"(i64 %s0) + call void asm sideeffect "# reg use $0", "{s1}"(i64 %s1) + call void asm sideeffect "# reg use $0", "{a0}"(i64 %a0) + call void asm sideeffect "# reg use $0", "{a1}"(i64 %a1) + call void asm sideeffect "# reg use $0", "{a2}"(i64 %a2) + call void asm sideeffect "# reg use $0", "{a3}"(i64 %a3) + call void asm sideeffect "# reg use $0", "{a4}"(i64 %a4) + call void asm sideeffect "# reg use $0", "{a5}"(i64 %a5) + call void asm sideeffect "# reg use $0", "{a6}"(i64 %a6) + call void asm sideeffect "# reg use $0", "{a7}"(i64 %a7) + call void asm sideeffect "# reg use $0", "{s2}"(i64 %s2) + call void asm sideeffect "# reg use $0", "{s3}"(i64 %s3) + call void asm sideeffect "# reg use $0", "{s4}"(i64 %s4) + call void asm sideeffect "# reg use $0", "{s5}"(i64 %s5) + call void asm sideeffect "# reg use $0", "{s6}"(i64 %s6) + call void asm sideeffect "# reg use $0", "{s7}"(i64 %s7) + call void asm sideeffect "# reg use $0", "{s8}"(i64 %s8) + call void asm sideeffect "# reg use $0", "{s9}"(i64 %s9) + call void asm sideeffect "# reg use $0", "{s10}"(i64 %s10) + call void asm sideeffect "# reg use $0", "{s11}"(i64 %s11) + call void asm sideeffect "# reg use $0", "{t3}"(i64 %t3) + call void asm sideeffect "# reg use $0", "{t4}"(i64 %t4) + call void asm sideeffect "# reg use $0", "{t5}"(i64 %t5) + call void asm sideeffect "# reg use $0", "{t6}"(i64 %t6) + + ret void +} diff --git a/llvm/test/CodeGen/RISCV/branch-relaxation.ll b/llvm/test/CodeGen/RISCV/branch-relaxation.ll --- a/llvm/test/CodeGen/RISCV/branch-relaxation.ll +++ b/llvm/test/CodeGen/RISCV/branch-relaxation.ll @@ -32,9 +32,10 @@ define i32 @relax_jal(i1 %a) nounwind { ; CHECK-LABEL: relax_jal: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: bnez a0, .LBB1_1 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: # %bb.4: ; CHECK-NEXT: jump .LBB1_2, a0 ; CHECK-NEXT: .LBB1_1: # %iftrue ; CHECK-NEXT: #APP @@ -42,12 +43,13 @@ ; CHECK-NEXT: #APP ; CHECK-NEXT: .zero 1048576 ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: ret +; CHECK-NEXT: j .LBB1_3 ; CHECK-NEXT: .LBB1_2: # %jmp ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: .LBB1_3: # %tail ; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret br i1 %a, label %iftrue, label %jmp