diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -488,15 +488,6 @@ MFFrame.setStackSize(StackSize); if (StackSize) { - // Determine if we want to store a backchain. - bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain"); - - // If we need backchain, save current stack pointer. R1 is free at this - // point. - if (StoreBackchain) - BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR)) - .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D); - // Allocate StackSize bytes. int64_t Delta = -int64_t(StackSize); const unsigned ProbeSize = TLI.getStackProbeSize(MF); @@ -512,18 +503,23 @@ .addImm(StackSize); } else { + bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain"); + // If we need backchain, save current stack pointer. R1 is free at + // this point. + if (StoreBackchain) + BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR)) + .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D); emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII); buildCFAOffs(MBB, MBBI, DL, SPOffsetFromCFA + Delta, ZII); + if (StoreBackchain) { + // The back chain is stored topmost with packed-stack. + int Offset = usePackedStack(MF) ? SystemZMC::CallFrameSize - 8 : 0; + BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG)) + .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D) + .addImm(Offset).addReg(0); + } } SPOffsetFromCFA += Delta; - - if (StoreBackchain) { - // The back chain is stored topmost with packed-stack. - int Offset = usePackedStack(MF) ? SystemZMC::CallFrameSize - 8 : 0; - BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG)) - .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D) - .addImm(Offset).addReg(0); - } } if (HasFP) { @@ -668,6 +664,13 @@ .addMemOperand(MMO); }; + bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain"); + if (StoreBackchain) + BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR)) + .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D); + + MachineBasicBlock *DoneMBB = nullptr; + MachineBasicBlock *LoopMBB = nullptr; if (NumFullBlocks < 3) { // Emit unrolled probe statements. for (unsigned int i = 0; i < NumFullBlocks; i++) @@ -677,15 +680,16 @@ uint64_t LoopAlloc = ProbeSize * NumFullBlocks; SPOffsetFromCFA -= LoopAlloc; - BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R1D) + // Use R0D to hold the exit value. + BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R0D) .addReg(SystemZ::R15D); - buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R1D, ZII); - emitIncrement(*MBB, MBBI, DL, SystemZ::R1D, -int64_t(LoopAlloc), ZII); + buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R0D, ZII); + emitIncrement(*MBB, MBBI, DL, SystemZ::R0D, -int64_t(LoopAlloc), ZII); buildCFAOffs(*MBB, MBBI, DL, -int64_t(SystemZMC::CallFrameSize + LoopAlloc), ZII); - MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MBBI, MBB); - MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB); + DoneMBB = SystemZ::splitBlockBefore(MBBI, MBB); + LoopMBB = SystemZ::emitBlockAfter(MBB); MBB->addSuccessor(LoopMBB); LoopMBB->addSuccessor(LoopMBB); LoopMBB->addSuccessor(DoneMBB); @@ -693,22 +697,32 @@ MBB = LoopMBB; allocateAndProbe(*MBB, MBB->end(), ProbeSize, false/*EmitCFI*/); BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::CLGR)) - .addReg(SystemZ::R15D).addReg(SystemZ::R1D); + .addReg(SystemZ::R15D).addReg(SystemZ::R0D); BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_GT).addMBB(MBB); MBB = DoneMBB; MBBI = DoneMBB->begin(); buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R15D, ZII); - - recomputeLiveIns(*DoneMBB); - recomputeLiveIns(*LoopMBB); } if (Residual) allocateAndProbe(*MBB, MBBI, Residual, true/*EmitCFI*/); + if (StoreBackchain) { + // The back chain is stored topmost with packed-stack. + int Offset = usePackedStack(MF) ? SystemZMC::CallFrameSize - 8 : 0; + BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::STG)) + .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D) + .addImm(Offset).addReg(0); + } + StackAllocMI->eraseFromParent(); + if (DoneMBB != nullptr) { + // Compute the live-in lists for the new blocks. + recomputeLiveIns(*DoneMBB); + recomputeLiveIns(*LoopMBB); + } } bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const { diff --git a/llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll --- a/llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll +++ b/llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -verify-machineinstrs | FileCheck %s define i32 @fun0(i32 %n) #0 { ; CHECK-LABEL: fun0: @@ -92,14 +92,14 @@ ; CHECK-NEXT: stmg %r11, %r15, 88(%r15) ; CHECK-NEXT: .cfi_offset %r11, -72 ; CHECK-NEXT: .cfi_offset %r15, -40 -; CHECK-NEXT: lgr %r1, %r15 -; CHECK-NEXT: .cfi_def_cfa_register %r1 -; CHECK-NEXT: aghi %r1, -160 +; CHECK-NEXT: lgr %r0, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r0 +; CHECK-NEXT: aghi %r0, -160 ; CHECK-NEXT: .cfi_def_cfa_offset 320 ; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: aghi %r15, -8 ; CHECK-NEXT: cg %r0, 0(%r15) -; CHECK-NEXT: clgrjh %r15, %r1, .LBB2_1 +; CHECK-NEXT: clgrjh %r15, %r0, .LBB2_1 ; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: .cfi_def_cfa_register %r15 ; CHECK-NEXT: lgr %r11, %r15 diff --git a/llvm/test/CodeGen/SystemZ/stack-clash-protection.ll b/llvm/test/CodeGen/SystemZ/stack-clash-protection.ll --- a/llvm/test/CodeGen/SystemZ/stack-clash-protection.ll +++ b/llvm/test/CodeGen/SystemZ/stack-clash-protection.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 -verify-machineinstrs | FileCheck %s ; ; Test stack clash protection probing for static allocas. @@ -48,14 +48,14 @@ define i32 @fun2() #0 { ; CHECK-LABEL: fun2: ; CHECK: # %bb.0: -; CHECK-NEXT: lgr %r1, %r15 -; CHECK-NEXT: .cfi_def_cfa_register %r1 -; CHECK-NEXT: agfi %r1, -69632 +; CHECK-NEXT: lgr %r0, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r0 +; CHECK-NEXT: agfi %r0, -69632 ; CHECK-NEXT: .cfi_def_cfa_offset 69792 ; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: aghi %r15, -4096 ; CHECK-NEXT: cg %r0, 4088(%r15) -; CHECK-NEXT: clgrjh %r15, %r1, .LBB2_1 +; CHECK-NEXT: clgrjh %r15, %r0, .LBB2_1 ; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: .cfi_def_cfa_register %r15 ; CHECK-NEXT: aghi %r15, -2544 @@ -81,15 +81,15 @@ define void @fun3() #0 { ; CHECK-LABEL: fun3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lgr %r1, %r15 -; CHECK-NEXT: .cfi_def_cfa_register %r1 -; CHECK-NEXT: aghi %r1, -28672 +; CHECK-NEXT: lgr %r0, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r0 +; CHECK-NEXT: aghi %r0, -28672 ; CHECK-NEXT: .cfi_def_cfa_offset 28832 ; CHECK-NEXT: .LBB3_1: # %entry ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: aghi %r15, -4096 ; CHECK-NEXT: cg %r0, 4088(%r15) -; CHECK-NEXT: clgrjh %r15, %r1, .LBB3_1 +; CHECK-NEXT: clgrjh %r15, %r0, .LBB3_1 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: .cfi_def_cfa_register %r15 ; CHECK-NEXT: mvhi 180(%r15), 0 @@ -110,15 +110,15 @@ define void @fun4() #0 "stack-probe-size"="8192" { ; CHECK-LABEL: fun4: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lgr %r1, %r15 -; CHECK-NEXT: .cfi_def_cfa_register %r1 -; CHECK-NEXT: aghi %r1, -24576 +; CHECK-NEXT: lgr %r0, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r0 +; CHECK-NEXT: aghi %r0, -24576 ; CHECK-NEXT: .cfi_def_cfa_offset 24736 ; CHECK-NEXT: .LBB4_1: # %entry ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: aghi %r15, -8192 ; CHECK-NEXT: cg %r0, 8184(%r15) -; CHECK-NEXT: clgrjh %r15, %r1, .LBB4_1 +; CHECK-NEXT: clgrjh %r15, %r0, .LBB4_1 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: .cfi_def_cfa_register %r15 ; CHECK-NEXT: aghi %r15, -7608 @@ -166,15 +166,15 @@ define void @fun6() #0 "stack-probe-size"="5" { ; CHECK-LABEL: fun6: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lgr %r1, %r15 -; CHECK-NEXT: .cfi_def_cfa_register %r1 -; CHECK-NEXT: aghi %r1, -4184 +; CHECK-NEXT: lgr %r0, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r0 +; CHECK-NEXT: aghi %r0, -4184 ; CHECK-NEXT: .cfi_def_cfa_offset 4344 ; CHECK-NEXT: .LBB6_1: # %entry ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: aghi %r15, -8 ; CHECK-NEXT: cg %r0, 0(%r15) -; CHECK-NEXT: clgrjh %r15, %r1, .LBB6_1 +; CHECK-NEXT: clgrjh %r15, %r0, .LBB6_1 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: .cfi_def_cfa_register %r15 ; CHECK-NEXT: mvhi 180(%r15), 0 @@ -237,6 +237,37 @@ ret i32 %c } +define void @fun9() #0 "backchain" { +; CHECK-LABEL: fun9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lgr %r1, %r15 +; CHECK-NEXT: lgr %r0, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r0 +; CHECK-NEXT: aghi %r0, -28672 +; CHECK-NEXT: .cfi_def_cfa_offset 28832 +; CHECK-NEXT: .LBB9_1: # %entry +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: aghi %r15, -4096 +; CHECK-NEXT: cg %r0, 4088(%r15) +; CHECK-NEXT: clgrjh %r15, %r0, .LBB9_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: .cfi_def_cfa_register %r15 +; CHECK-NEXT: stg %r1, 0(%r15) +; CHECK-NEXT: mvhi 180(%r15), 0 +; CHECK-NEXT: l %r0, 180(%r15) +; CHECK-NEXT: aghi %r15, 28672 +; CHECK-NEXT: br %r14 +entry: + %stack = alloca [7122 x i32], align 4 + %i = alloca i32, align 4 + %0 = bitcast [7122 x i32]* %stack to i8* + %i.0.i.0..sroa_cast = bitcast i32* %i to i8* + store volatile i32 0, i32* %i, align 4 + %i.0.i.0.6 = load volatile i32, i32* %i, align 4 + ret void +} + + declare i32 @foo() attributes #0 = { "probe-stack"="inline-asm" }