Index: llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -435,22 +435,19 @@ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); - // If we are a leaf function, and use up to 224 bytes of stack space, - // don't have a frame pointer, calls, or dynamic alloca then we do not need - // to adjust the stack pointer (we fit in the Red Zone). - // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate - // stackless code if all local vars are reg-allocated. - bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); unsigned LR = RegInfo->getRARegister(); - if (!DisableRedZone && - (Subtarget.isPPC64() || // 32-bit SVR4, no stack- - !Subtarget.isSVR4ABI() || // allocated locals. - FrameSize == 0) && - FrameSize <= 224 && // Fits in red zone. - !MFI.hasVarSizedObjects() && // No dynamic alloca. - !MFI.adjustsStack() && // No calls. - !MustSaveLR(MF, LR) && - !RegInfo->hasBasePointer(MF)) { // No special alignment. + bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); + bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. + !MFI.adjustsStack() && // No calls. + !MustSaveLR(MF, LR) && // No need to save LR. + !RegInfo->hasBasePointer(MF); // No special alignment. + + // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless + // code if all local vars are reg-allocated. + bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); + + // Check whether we can skip adjusting the stack pointer (by using red zone) + if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { // No need for frame if (UpdateMF) MFI.setStackSize(0); @@ -1869,8 +1866,13 @@ } if (HasVRSaveArea) { - // Insert alignment padding, we need 16-byte alignment. - LowerBound = (LowerBound - 15) & ~(15); + // Insert alignment padding, we need 16-byte alignment. Note: for postive + // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since + // we are using negative number here (the stack grows downward). We should + // use formula : y = x & (~(n-1)). Where x is the size before aligning, n + // is the alignment size ( n = 16 here) and y is the size after aligning. + assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); + LowerBound &= ~(15); for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { int FI = VRegs[i].getFrameIdx(); Index: llvm/trunk/lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCSubtarget.h +++ llvm/trunk/lib/Target/PowerPC/PPCSubtarget.h @@ -272,6 +272,13 @@ return 16; } + + // DarwinABI has a 224-byte red zone. PPC32 SVR4ABI(Non-DarwinABI) has no + // red zone and PPC64 SVR4ABI has a 288-byte red zone. + unsigned getRedZoneSize() const { + return isDarwinABI() ? 224 : (isPPC64() ? 288 : 0); + } + bool hasHTM() const { return HasHTM; } bool hasFusion() const { return HasFusion; } bool hasFloat128() const { return HasFloat128; } Index: llvm/trunk/test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll +++ llvm/trunk/test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll @@ -0,0 +1,32 @@ +; Note the formula for negative number alignment calculation should be y = x & ~(n-1) rather than y = (x + (n-1)) & ~(n-1). +; after patch https://reviews.llvm.org/D34337, we could save 16 bytes in the best case. +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE + +define signext i32 @bar(i32 signext %ii) { +entry: + %0 = tail call i32 asm sideeffect "add $0, $1, $2\0A", "=r,r,r,~{f14},~{r15},~{v20}"(i32 %ii, i32 10) + ret i32 %0 +; Before the fix by patch D34337: +; stdu 1, -544(1) +; std 15, 264(1) +; stfd 14, 400(1) +; stdu 1, -560(1) +; std 15, 280(1) +; stfd 14, 416(1) + +; After the fix by patch D34337: +; CHECK-LE: stdu 1, -528(1) +; CHECK-LE:std 15, 248(1) +; CHECK-LE:stfd 14, 384(1) +; CHECK-BE: stdu 1, -544(1) +; CHECK-BE:std 15, 264(1) +; CHECK-BE:stfd 14, 400(1) +} + +define signext i32 @foo() { +entry: + %call = tail call signext i32 @bar(i32 signext 5) + ret i32 %call +} + Index: llvm/trunk/test/CodeGen/PowerPC/svr4-redzone.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/svr4-redzone.ll +++ llvm/trunk/test/CodeGen/PowerPC/svr4-redzone.ll @@ -29,11 +29,11 @@ define i8* @bigstack() nounwind { entry: - %0 = alloca i8, i32 230 + %0 = alloca i8, i32 290 ret i8* %0 } ; PPC32-LABEL: bigstack: -; PPC32: stwu 1, -240(1) +; PPC32: stwu 1, -304(1) ; PPC64-LABEL: bigstack: -; PPC64: stdu 1, -288(1) +; PPC64: stdu 1, -352(1) Index: llvm/trunk/test/CodeGen/PowerPC/tailcall1-64.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/tailcall1-64.ll +++ llvm/trunk/test/CodeGen/PowerPC/tailcall1-64.ll @@ -1,4 +1,5 @@ ; RUN: llc -relocation-model=static -verify-machineinstrs < %s -march=ppc64 -tailcallopt | grep TC_RETURNd8 +; RUN: llc -relocation-model=static -verify-machineinstrs -march=ppc64 < %s | FileCheck %s define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { entry: ret i32 %a3 @@ -6,6 +7,9 @@ define fastcc i32 @tailcaller(i32 %in1, i32 %in2) { entry: - %tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; [#uses=1] + %tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ret i32 %tmp11 +; CHECK-LABEL: tailcaller +; CHECK-NOT: stdu +; CHECK: b tailcallee }