diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -1077,7 +1077,26 @@ // If the frame pointer is eliminated, all frame offsets will be relative to // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); + int64_t OffsetBeforeAlignment = Offset; Offset = alignTo(Offset, StackAlign, Skew); + + // If we have increased the offset to fulfill the alignment constrants, + // then the scavenging spill slots may become harder to reach from the + // stack pointer, float them so they stay close. + if (OffsetBeforeAlignment != Offset && RS && !EarlyScavengingSlots) { + SmallVector SFIs; + RS->getScavengingFrameIndices(SFIs); + LLVM_DEBUG(if (!SFIs.empty()) llvm::dbgs() + << "Adjusting emergency spill slots!\n";); + int64_t Delta = Offset - OffsetBeforeAlignment; + for (SmallVectorImpl::iterator I = SFIs.begin(), IE = SFIs.end(); + I != IE; ++I) { + LLVM_DEBUG(llvm::dbgs() << "Adjusting offset of emergency spill slot #" + << *I << " from " << MFI.getObjectOffset(*I);); + MFI.setObjectOffset(*I, MFI.getObjectOffset(*I) - Delta); + LLVM_DEBUG(llvm::dbgs() << " to " << MFI.getObjectOffset(*I) << "\n";); + } + } } // Update frame info to pretend that this is part of the stack... diff --git a/llvm/test/CodeGen/AArch64/framelayout-scavengingslot.mir b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot.mir --- a/llvm/test/CodeGen/AArch64/framelayout-scavengingslot.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot.mir @@ -5,10 +5,10 @@ name: LateScavengingSlotRealignment # CHECK-LABEL: name: LateScavengingSlotRealignment # CHECK: bb.0: -# CHECK: STRXui killed $[[SCRATCH:x[0-9]+]], $sp, 3 +# CHECK: STRXui killed $[[SCRATCH:x[0-9]+]], $sp, 0 # CHECK-NEXT: $[[SCRATCH]] = ADDXri $sp, 40, 0 # CHECK-NEXT: STRXui $x0, killed $[[SCRATCH]], 4095 -# CHECK-NEXT: $[[SCRATCH]] = LDRXui $sp, 3 +# CHECK-NEXT: $[[SCRATCH]] = LDRXui $sp, 0 # CHECK: bb.1: tracksRegLiveness: true frameInfo: diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir --- a/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir @@ -7,7 +7,7 @@ # CHECK: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 8, 12 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 -# CHECK: STRXui killed $[[SCRATCH:x[0-9]+]], $sp, 1 +# CHECK: STRXui killed $[[SCRATCH:x[0-9]+]], $sp, 0 # CHECK-NEXT: $[[SCRATCH]] = ADDVL_XXI $fp, -1 # CHECK-NEXT: STRXui $x0, killed $[[SCRATCH]], 0 # CHECK: bb.1: diff --git a/llvm/test/CodeGen/AArch64/swiftself-scavenger.ll b/llvm/test/CodeGen/AArch64/swiftself-scavenger.ll --- a/llvm/test/CodeGen/AArch64/swiftself-scavenger.ll +++ b/llvm/test/CodeGen/AArch64/swiftself-scavenger.ll @@ -2,10 +2,10 @@ ; Check that we reserve an emergency spill slot, even if we added an extra ; CSR spill for the values used by the swiftself parameter. ; CHECK-LABEL: func: -; CHECK: str [[REG:x[0-9]+]], [sp, #8] +; CHECK: str [[REG:x[0-9]+]], [sp] ; CHECK: add [[REG]], sp, #248 ; CHECK: str xzr, [{{\s*}}[[REG]], #32760] -; CHECK: ldr [[REG]], [sp, #8] +; CHECK: ldr [[REG]], [sp] target triple = "arm64-apple-ios" @ptr8 = external global i8* diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir @@ -30,16 +30,14 @@ ; GFX8: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc ; GFX8: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc ; GFX8: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec - ; GFX8: $sgpr4 = S_ADD_U32 $sgpr33, 524544, implicit-def $scc - ; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) + ; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) ; GFX8: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX8: $vcc_lo = S_MOV_B32 8192 ; GFX8: $vgpr3, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec ; GFX8: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec ; GFX8: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc ; GFX8: $sgpr33 = V_READLANE_B32 $vgpr2, 0 - ; GFX8: $sgpr4 = S_ADD_U32 $sgpr33, 524544, implicit-def $scc - ; GFX8: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) + ; GFX8: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) ; GFX8: S_ENDPGM 0, csr_amdgpu_allvgprs ; GFX9-LABEL: name: pei_scavenge_vgpr_spill ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 @@ -48,15 +46,13 @@ ; GFX9: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc ; GFX9: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc ; GFX9: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec - ; GFX9: $sgpr4 = S_ADD_U32 $sgpr33, 524544, implicit-def $scc - ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) + ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) ; GFX9: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX9: $vgpr3 = V_ADD_U32_e32 8192, killed $vgpr3, implicit $exec ; GFX9: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec ; GFX9: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc ; GFX9: $sgpr33 = V_READLANE_B32 $vgpr2, 0 - ; GFX9: $sgpr4 = S_ADD_U32 $sgpr33, 524544, implicit-def $scc - ; GFX9: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) + ; GFX9: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) ; GFX9: S_ENDPGM 0, csr_amdgpu_allvgprs ; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill ; GFX9-FLATSCR: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 diff --git a/llvm/test/CodeGen/RISCV/out-of-reach-emergency-slot.mir b/llvm/test/CodeGen/RISCV/out-of-reach-emergency-slot.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/out-of-reach-emergency-slot.mir @@ -0,0 +1,78 @@ +# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +# REQUIRES: asserts +# RUN: llc -mtriple riscv64 -start-before=prologepilog -o - \ +# RUN: -verify-machineinstrs %s | FileCheck %s +# +# RUN: llc -mtriple riscv64 -start-before=prologepilog -o /dev/null \ +# RUN: -debug-only=prologepilog -verify-machineinstrs %s 2>&1 \ +# RUN: | FileCheck --check-prefix=DEBUG %s +# +# DEBUG: Adjusting emergency spill slots! +# DEBUG: Adjusting offset of emergency spill slot #4 from -4112 to -8192 + +--- | + ; ModuleID = 'reduced.ll' + source_filename = "frame_layout-1253b1.cpp" + target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" + target triple = "riscv64" + + ; Function Attrs: nounwind + define weak_odr dso_local void @foo(i8* %ay) nounwind { + ; CHECK-LABEL: foo: + ; CHECK: # %bb.0: # %entry + ; CHECK-NEXT: addi sp, sp, -2032 + ; CHECK-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill + ; CHECK-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill + ; CHECK-NEXT: addi s0, sp, 2032 + ; CHECK-NEXT: sd a1, 0(sp) + ; CHECK-NEXT: lui a1, 2 + ; CHECK-NEXT: addiw a1, a1, -2032 + ; CHECK-NEXT: sub sp, sp, a1 + ; CHECK-NEXT: srli a1, sp, 12 + ; CHECK-NEXT: slli sp, a1, 12 + ; CHECK-NEXT: lui a1, 1 + ; CHECK-NEXT: addiw a1, a1, -8 + ; CHECK-NEXT: add a1, sp, a1 + ; CHECK-NEXT: sd a0, 0(a1) + ; CHECK-NEXT: ld a1, 0(sp) + ; CHECK-NEXT: call foo@plt + ; CHECK-NEXT: lui a0, 2 + ; CHECK-NEXT: sub sp, s0, a0 + ; CHECK-NEXT: lui a0, 2 + ; CHECK-NEXT: addiw a0, a0, -2032 + ; CHECK-NEXT: add sp, sp, a0 + ; CHECK-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload + ; CHECK-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload + ; CHECK-NEXT: addi sp, sp, 2032 + ; CHECK-NEXT: ret + entry: + ret void + } + + +... +--- +name: foo +alignment: 2 +tracksRegLiveness: false +frameInfo: + maxAlignment: 4096 +stack: + - { id: 0, size: 8, alignment: 4096 } + - { id: 1, type: spill-slot, size: 8, alignment: 8 } +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x1, $x5, $x6, $x7, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x28, $x29, $x30, $x31 + + ; This is to store something to the (non-emergency) spill slot %stack.1. + SD $x10, %stack.1, 0 + ; This is here just to make all the eligible registers live at this point. + ; This way when we replace the frame index %stack.1 with its actual address + ; we have to allocate a virtual register to compute it. + ; A later run of the the register scavenger won't find an available register + ; either so it will have to spill one to the emergency spill slot. + PseudoCALL target-flags(riscv-plt) @foo, csr_ilp32_lp64, implicit-def $x1, implicit-def $x2, implicit $x1, implicit $x5, implicit $x6, implicit $x7, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x28, implicit $x29, implicit $x30, implicit $x31 + PseudoRET + +... diff --git a/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll b/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll --- a/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll +++ b/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll @@ -253,10 +253,10 @@ ; CHECK-NEXT: adds r1, #1 ; CHECK-NEXT: @APP ; CHECK-NEXT: @NO_APP -; CHECK-NEXT: str r0, [sp, #12] +; CHECK-NEXT: str r0, [sp] ; CHECK-NEXT: ldr r0, .LCPI5_0 ; CHECK-NEXT: str r5, [r0, r7] -; CHECK-NEXT: ldr r0, [sp, #12] +; CHECK-NEXT: ldr r0, [sp] ; CHECK-NEXT: @APP ; CHECK-NEXT: @NO_APP ; CHECK-NEXT: subs r4, r7, #7