Index: llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -926,7 +926,7 @@ } // Have we generated a STUX instruction to claim stack frame? If so, - // the frame size will be placed in ScratchReg. + // the negated frame size will be placed in ScratchReg. bool HasSTUX = false; // This condition must be kept in sync with canUseAsPrologue. @@ -986,33 +986,88 @@ if (!HasRedZone) { assert(!isPPC64 && "A red zone is always available on PPC64"); if (HasSTUX) { - // The frame size is in ScratchReg, and the SPReg has been advanced - // (downwards) by the frame size: SPReg = old SPReg + ScratchReg. - // Set ScratchReg to the original SPReg: ScratchReg = SPReg - ScratchReg. + // The negated frame size is in ScratchReg, and the SPReg has been + // decremented by the frame size: SPReg = old SPReg + ScratchReg. + // Since FPOffset, PBPOffset, etc. are relative to the beginning of + // the stack frame (i.e. the old SP), ideally, we would put the old + // SP into a register and use it as the base for the stores. The + // problem is that the only available register may be ScratchReg, + // which could be R0, and R0 cannot be used as a base address. + + // First, set ScratchReg to the old SP. This may need to be modified + // later. BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) .addReg(ScratchReg, RegState::Kill) .addReg(SPReg); - // Now that the stack frame has been allocated, save all the necessary - // registers using ScratchReg as the base address. - if (HasFP) - BuildMI(MBB, MBBI, dl, StoreInst) - .addReg(FPReg) - .addImm(FPOffset) - .addReg(ScratchReg); - if (FI->usesPICBase()) - BuildMI(MBB, MBBI, dl, StoreInst) - .addReg(PPC::R30) - .addImm(PBPOffset) - .addReg(ScratchReg); - if (HasBP) { - BuildMI(MBB, MBBI, dl, StoreInst) - .addReg(BPReg) - .addImm(BPOffset) - .addReg(ScratchReg); - BuildMI(MBB, MBBI, dl, OrInst, BPReg) - .addReg(ScratchReg, RegState::Kill) - .addReg(ScratchReg); + if (ScratchReg == PPC::R0) { + // R0 cannot be used as a base register, but it can be used as an + // index in a store-indexed. + int LastOffset = 0; + if (HasFP) { + // R0 += (FPOffset-LastOffset). + // Need addic, since addi treats R0 as 0. + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) + .addReg(ScratchReg) + .addImm(FPOffset-LastOffset); + LastOffset = FPOffset; + // Store FP into *R0. + BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) + .addReg(FPReg, RegState::Kill) // Save FP. + .addReg(PPC::ZERO) + .addReg(ScratchReg); // This will be the index (R0 is ok here). + } + if (FI->usesPICBase()) { + // R0 += (PBPOffset-LastOffset). + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) + .addReg(ScratchReg) + .addImm(PBPOffset-LastOffset); + LastOffset = PBPOffset; + BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) + .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. + .addReg(PPC::ZERO) + .addReg(ScratchReg); // This will be the index (R0 is ok here). + } + if (HasBP) { + // R0 += (BPOffset-LastOffset). + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) + .addReg(ScratchReg) + .addImm(BPOffset-LastOffset); + LastOffset = BPOffset; + BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) + .addReg(BPReg, RegState::Kill) // Save BP. + .addReg(PPC::ZERO) + .addReg(ScratchReg); // This will be the index (R0 is ok here). + // BP = R0-LastOffset + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) + .addReg(ScratchReg, RegState::Kill) + .addImm(-LastOffset); + } + } else { + // ScratchReg is not R0, so use it as the base register. It is + // already set to the old SP, so we can use the offsets directly. + + // Now that the stack frame has been allocated, save all the necessary + // registers using ScratchReg as the base address. + if (HasFP) + BuildMI(MBB, MBBI, dl, StoreInst) + .addReg(FPReg) + .addImm(FPOffset) + .addReg(ScratchReg); + if (FI->usesPICBase()) + BuildMI(MBB, MBBI, dl, StoreInst) + .addReg(PPC::R30) + .addImm(PBPOffset) + .addReg(ScratchReg); + if (HasBP) { + BuildMI(MBB, MBBI, dl, StoreInst) + .addReg(BPReg) + .addImm(BPOffset) + .addReg(ScratchReg); + BuildMI(MBB, MBBI, dl, OrInst, BPReg) + .addReg(ScratchReg, RegState::Kill) + .addReg(ScratchReg); + } } } else { // The frame size is a known 16-bit constant (fitting in the immediate @@ -1190,6 +1245,7 @@ // Do we have a frame pointer and/or base pointer for this function? bool HasFP = hasFP(MF); bool HasBP = RegInfo->hasBasePointer(MF); + bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; unsigned BPReg = RegInfo->getBaseRegister(MF); @@ -1202,6 +1258,8 @@ : PPC::LWZ ); const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 : PPC::LIS ); + const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 + : PPC::OR ); const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 : PPC::ORI ); const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 @@ -1223,7 +1281,6 @@ if (HasFP) { if (isSVR4ABI) { - MachineFrameInfo &MFI = MF.getFrameInfo(); int FPIndex = FI->getFramePointerSaveIndex(); assert(FPIndex && "No Frame Pointer Save Slot!"); FPOffset = MFI.getObjectOffset(FPIndex); @@ -1235,7 +1292,6 @@ int BPOffset = 0; if (HasBP) { if (isSVR4ABI) { - MachineFrameInfo &MFI = MF.getFrameInfo(); int BPIndex = FI->getBasePointerSaveIndex(); assert(BPIndex && "No Base Pointer Save Slot!"); BPOffset = MFI.getObjectOffset(BPIndex); @@ -1246,7 +1302,6 @@ int PBPOffset = 0; if (FI->usesPICBase()) { - MachineFrameInfo &MFI = MF.getFrameInfo(); int PBPIndex = FI->getPICBasePointerSaveIndex(); assert(PBPIndex && "No PIC Base Pointer Save Slot!"); PBPOffset = MFI.getObjectOffset(PBPIndex); @@ -1282,9 +1337,25 @@ // indexed into with a simple LD/LWZ immediate offset operand. bool isLargeFrame = !isInt<16>(FrameSize); + // On targets without red zone, the SP needs to be restored last, so that + // all live contents of the stack frame are upwards of the SP. This means + // that we cannot restore SP just now, since there may be more registers + // to restore from the stack frame (e.g. R31). If the frame size is not + // a simple immediate value, we will need a spare register to hold the + // restored SP. If the frame size is known and small, we can simply adjust + // the offsets of the registers to be restored, and still use SP to restore + // them. In such case, the final update of SP will be to add the frame + // size to it. + // To simplify the code, set RBReg to the base register used to restore + // values from the stack, and set SPAdd to the value that needs to be added + // to the SP at the end. The default values are as if red zone was present. + unsigned RBReg = SPReg; + unsigned SPAdd = 0; + if (FrameSize) { - // In the prologue, the loaded (or persistent) stack pointer value is offset - // by the STDU/STDUX/STWU/STWUX instruction. Add this offset back now. + // In the prologue, the loaded (or persistent) stack pointer value is + // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red + // zone add this offset back now. // If this function contained a fastcc call and GuaranteedTailCallOpt is // enabled (=> hasFastCall()==true) the fastcc call might contain a tail @@ -1292,8 +1363,10 @@ // value of R31 in this case. if (FI->hasFastCall()) { assert(HasFP && "Expecting a valid frame pointer."); + if (!HasRedZone) + RBReg = FPReg; if (!isLargeFrame) { - BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) + BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) .addReg(FPReg).addImm(FrameSize); } else { BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) @@ -1302,27 +1375,55 @@ .addReg(ScratchReg, RegState::Kill) .addImm(FrameSize & 0xFFFF); BuildMI(MBB, MBBI, dl, AddInst) - .addReg(SPReg) + .addReg(RBReg) .addReg(FPReg) .addReg(ScratchReg); } } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { - BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) - .addReg(SPReg) - .addImm(FrameSize); + if (HasRedZone) { + BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) + .addReg(SPReg) + .addImm(FrameSize); + } else { + // Make sure that adding FrameSize will not overflow the max offset + // size. + assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && + "Local offsets should be negative"); + SPAdd = FrameSize; + FPOffset += FrameSize; + BPOffset += FrameSize; + PBPOffset += FrameSize; + } } else { - BuildMI(MBB, MBBI, dl, LoadInst, SPReg) + // We don't want to use ScratchReg as a base register, because it + // could happen to be R0. Use FP instead, but make sure to preserve it. + if (!HasRedZone) { + // If FP is not saved, copy it to ScratchReg. + if (!HasFP) + BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) + .addReg(FPReg) + .addReg(FPReg); + RBReg = FPReg; + } + BuildMI(MBB, MBBI, dl, LoadInst, RBReg) .addImm(0) .addReg(SPReg); } } + assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); + // If there is no red zone, ScratchReg may be needed for holding a useful + // value (although not the base register). Make sure it is not overwritten + // too early. assert((isPPC64 || !MustSaveCR) && "Epilogue CR restoring supported only in 64-bit mode"); - // If we need to save both the LR and the CR and we only have one available - // scratch register, we must do them one at a time. + // If we need to restore both the LR and the CR and we only have one + // available scratch register, we must do them one at a time. if (MustSaveCR && SingleScratchReg && MustSaveLR) { + // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg + // is live here. + assert(HasRedZone && "Expecting red zone"); BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) .addImm(8) .addReg(SPReg); @@ -1331,33 +1432,77 @@ .addReg(TempReg, getKillRegState(i == e-1)); } - if (MustSaveLR) + // Delay restoring of the LR if ScratchReg is needed. This is ok, since + // LR is stored in the caller's stack frame. ScratchReg will be needed + // if RBReg is anything other than SP. We shouldn't use ScratchReg as + // a base register anyway, because it may happen to be R0. + bool LoadedLR = false; + if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) - .addImm(LROffset) - .addReg(SPReg); + .addImm(LROffset+SPAdd) + .addReg(RBReg); + LoadedLR = true; + } - if (MustSaveCR && - !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 + if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { + // This will only occur for PPC64. + assert(isPPC64 && "Expecting 64-bit mode"); + assert(RBReg == SPReg && "Should be using SP as a base register"); BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) .addImm(8) - .addReg(SPReg); + .addReg(RBReg); + } - if (HasFP) - BuildMI(MBB, MBBI, dl, LoadInst, FPReg) - .addImm(FPOffset) - .addReg(SPReg); + if (HasFP) { + // If there is red zone, restore FP directly, since SP has already been + // restored. Otherwise, restore the value of FP into ScratchReg. + if (HasRedZone || RBReg == SPReg) + BuildMI(MBB, MBBI, dl, LoadInst, FPReg) + .addImm(FPOffset) + .addReg(SPReg); + else + BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) + .addImm(FPOffset) + .addReg(RBReg); + } if (FI->usesPICBase()) - // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. BuildMI(MBB, MBBI, dl, LoadInst) .addReg(PPC::R30) .addImm(PBPOffset) - .addReg(SPReg); + .addReg(RBReg); if (HasBP) BuildMI(MBB, MBBI, dl, LoadInst, BPReg) .addImm(BPOffset) - .addReg(SPReg); + .addReg(RBReg); + + // There is nothing more to be loaded from the stack, so now we can + // restore SP: SP = RBReg + SPAdd. + if (RBReg != SPReg || SPAdd != 0) { + assert(!HasRedZone && "This should not happen with red zone"); + // If SPAdd is 0, generate a copy. + if (SPAdd == 0) + BuildMI(MBB, MBBI, dl, OrInst, SPReg) + .addReg(RBReg) + .addReg(RBReg); + else + BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) + .addReg(RBReg) + .addImm(SPAdd); + + assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); + if (RBReg == FPReg) + BuildMI(MBB, MBBI, dl, OrInst, FPReg) + .addReg(ScratchReg) + .addReg(ScratchReg); + + // Now load the LR from the caller's stack frame. + if (MustSaveLR && !LoadedLR) + BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) + .addImm(LROffset) + .addReg(SPReg); + } if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 Index: llvm/trunk/test/CodeGen/PowerPC/ppc32-pic-large.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/ppc32-pic-large.ll +++ llvm/trunk/test/CodeGen/PowerPC/ppc32-pic-large.ll @@ -25,7 +25,7 @@ ; LARGE-BSS-DAG: lwz [[VREG:[0-9]+]], [[VREF:\.LC[0-9]+]]-.LTOC(30) ; LARGE-BSS-DAG: lwz {{[0-9]+}}, 0([[VREG]]) ; LARGE-BSS-DAG: stw {{[0-9]+}}, 8(1) -; LARGE-BSS: lwz 30, -8(1) +; LARGE-BSS: lwz 30, 24(1) ; LARGE-BSS: [[VREF]]: ; LARGE-BSS-NEXT: .p2align 2 ; LARGE-BSS-NEXT: .long bar Index: llvm/trunk/test/CodeGen/PowerPC/ppc32-pic.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/ppc32-pic.ll +++ llvm/trunk/test/CodeGen/PowerPC/ppc32-pic.ll @@ -21,4 +21,4 @@ ; SMALL-BSS-DAG: lwz [[VREG:[0-9]+]], bar@GOT(30) ; SMALL-BSS-DAG: lwz {{[0-9]+}}, 0([[VREG]]) ; SMALL-BSS: bl call_foo@PLT -; SMALL-BSS: lwz 30, -8(1) +; SMALL-BSS: lwz 30, 24(1) Index: llvm/trunk/test/CodeGen/PowerPC/stack-no-redzone.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/stack-no-redzone.ll +++ llvm/trunk/test/CodeGen/PowerPC/stack-no-redzone.ll @@ -0,0 +1,146 @@ +; Test that accesses of the stack remain within the range defined by R1, +; i.e. that loads and stores only access the allocated stack. This does not +; have to be the case when red zone is present. + +; Make sure that there is no red zone, i.e. ppc32 and SVR4 ABI. +; RUN: llc -mtriple=powerpc--freebsd-elf < %s | FileCheck %s + +; There are two ways that the stack pointer can be adjusted in the prologue: +; - by adding an immediate value: +; stwu r1, -imm(r1) +; - by adding another register: +; stwux r1, rx, r1 +; +; The restoring of the stack pointer can be done: +; - by adding an immediate value to it: +; addi r1, r1, imm +; - by copying the value from another register: +; mr r1, rx + + +; Nothing (no special features). +; +; CHECK-LABEL: test_n: +; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1) +; CHECK: stwu 1, -[[SIZE:[0-9]+]](1) +; CHECK: addi 1, 1, [[SIZE]] +; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1) +define i32 @test_n() local_unnamed_addr #0 { +entry: + %t0 = tail call i32 bitcast (i32 (...)* @bar0 to i32 ()*)() #0 + ret i32 %t0 +} + +; Aligned object on the stack. +; +; CHECK-LABEL: test_a: +; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1) +; CHECK: stwux 1, 1, {{[0-9]+}} +; CHECK: mr 1, {{[0-9]+}} +; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1) + +define i32 @test_a() local_unnamed_addr #0 { +entry: + %t0 = alloca i32, align 128 + %t1 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0 + ret i32 %t1 +} + +; Dynamic allocation on the stack. +; +; CHECK-LABEL: test_d: +; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1) +; CHECK: stwu 1, -[[SIZE:[0-9]+]](1) +; CHECK: mr 1, {{[0-9]+}} +; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1) +define i32 @test_d(i32 %p0) local_unnamed_addr #0 { + %t0 = alloca i32, i32 %p0, align 4 + %t1 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0 + ret i32 %t1 +} + +; Large stack (exceeds size of D-field). +; CHECK-LABEL: test_s: +; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1) +; CHECK: stwux 1, 1, {{[0-9]+}} +; CHECK: mr 1, {{[0-9]+}} +; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1) +define i32 @test_s(i32 %p0) local_unnamed_addr #0 { +entry: + %t0 = alloca [16384 x i32] + %t1 = getelementptr [16384 x i32], [16384 x i32]* %t0, i32 0, i32 0 + %t2 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t1) #0 + ret i32 %t2 +} + +; Combinations. + +; CHECK-LABEL: test_ad: +; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1) +; CHECK: stwux 1, 1, {{[0-9]+}} +; CHECK: mr 1, {{[0-9]+}} +; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1) +define i32 @test_ad(i32 %p0) local_unnamed_addr #0 { + %t0 = alloca i32, align 128 + %t1 = alloca i32, i32 %p0, align 4 + %t2 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0 + %t3 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t1) #0 + %t4 = add i32 %t2, %t3 + ret i32 %t4 +} + +; CHECK-LABEL: test_as: +; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1) +; CHECK: stwux 1, 1, {{[0-9]+}} +; CHECK: mr 1, {{[0-9]+}} +; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1) +define i32 @test_as() local_unnamed_addr #0 { + %t0 = alloca i32, align 128 + %t1 = alloca [16384 x i32] + %t2 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0 + %t3 = getelementptr [16384 x i32], [16384 x i32]* %t1, i32 0, i32 0 + %t4 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t3) #0 + %t5 = add i32 %t2, %t4 + ret i32 %t5 +} + +; CHECK-LABEL: test_ds: +; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1) +; CHECK: stwux 1, 1, {{[0-9]+}} +; CHECK: mr 1, {{[0-9]+}} +; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1) +define i32 @test_ds(i32 %p0) local_unnamed_addr #0 { + %t0 = alloca i32, i32 %p0, align 4 + %t1 = alloca [16384 x i32] + %t2 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0 + %t3 = getelementptr [16384 x i32], [16384 x i32]* %t1, i32 0, i32 0 + %t4 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t3) #0 + %t5 = add i32 %t2, %t4 + ret i32 %t5 +} + +; CHECK-LABEL: test_ads: +; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1) +; CHECK: stwux 1, 1, {{[0-9]+}} +; CHECK: mr 1, {{[0-9]+}} +; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1) +define i32 @test_ads(i32 %p0) local_unnamed_addr #0 { + %t0 = alloca i32, align 128 + %t1 = alloca i32, i32 %p0, align 4 + %t2 = alloca [16384 x i32] + + %t3 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0 + %t4 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t1) #0 + %t5 = add i32 %t3, %t4 + + %t6 = getelementptr [16384 x i32], [16384 x i32]* %t2, i32 0, i32 0 + %t7 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t6) #0 + %t8 = add i32 %t5, %t7 + ret i32 %t7 +} + + +declare i32 @bar0(...) local_unnamed_addr #0 +declare i32 @bar1(...) local_unnamed_addr #0 + +attributes #0 = { nounwind } Index: llvm/trunk/test/CodeGen/PowerPC/stack-realign.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/stack-realign.ll +++ llvm/trunk/test/CodeGen/PowerPC/stack-realign.ll @@ -83,18 +83,26 @@ ; CHECK-32-DAG: subfic 0, [[REG]], -64 ; CHECK-32: stwux 1, 1, 0 ; CHECK-32: subf 0, 0, 1 -; CHECK-32: stw 30, -8(0) -; CHECK-32: mr 30, 0 +; CHECK-32: addic 0, 0, -4 +; CHECK-32: stwx 31, 0, 0 +; CHECK-32: addic 0, 0, -4 +; CHECK-32: stwx 30, 0, 0 +; CHECK-32: addic 30, 0, 8 ; CHECK-32-PIC-LABEL: @goo ; CHECK-32-PIC-DAG: mflr [[LR:[0-9]+]] ; CHECK-32-PIC-DAG: clrlwi [[REG:[0-9]+]], 1, 27 ; CHECK-32-PIC-DAG: stw [[LR]], 4(1) ; CHECK-32-PIC-DAG: subfic 0, [[REG]], -64 -; CHECK-32-PIC: stwux 1, 1, 0 -; CHECK-32-PIC: subf 0, 0, 1 -; CHECK-32-PIC: stw 29, -12(0) -; CHECK-32-PIC-DAG: mr 29, 0 +; CHECK-32-PIC: stwux 1, 1, 0 +; CHECK-32-PIC: subf 0, 0, 1 +; CHECK-32-PIC: addic 0, 0, -4 +; CHECK-32-PIC: stwx 31, 0, 0 +; CHECK-32-PIC: addic 0, 0, -4 +; CHECK-32-PIC: stwx 30, 0, 0 +; CHECK-32-PIC: addic 0, 0, -4 +; CHECK-32-PIC: stwx 29, 0, 0 +; CHECK-32-PIC: addic 29, 0, 12 ; The large-frame-size case. define void @hoo(%struct.s* byval nocapture readonly %a) { @@ -138,9 +146,11 @@ ; CHECK-32-DAG: subfc 0, [[REG3]], [[REG2]] ; CHECK-32: stwux 1, 1, 0 ; CHECK-32: subf 0, 0, 1 -; CHECK-32-DAG: stw 31, -4(0) -; CHECK-32-DAG: stw 30, -8(0) -; CHECK-32: mr 30, 0 +; CHECK-32: addic 0, 0, -4 +; CHECK-32: stwx 31, 0, 0 +; CHECK-32: addic 0, 0, -4 +; CHECK-32: stwx 30, 0, 0 +; CHECK-32: addic 30, 0, 8 ; CHECK-32: blr @@ -152,10 +162,13 @@ ; CHECK-32-PIC-DAG: ori [[REG2:[0-9]+]], [[REG1]], 51904 ; CHECK-32-PIC-DAG: stw 0, 4(1) ; CHECK-32-PIC-DAG: subfc 0, [[REG3]], [[REG2]] -; CHECK-32-PIC: stwux 1, 1, 0 -; CHECK-32-PIC: stw 29, -12(0) -; CHECK-32-PIC: subf 0, 0, 1 -; CHECK-32-PIC: mr 29, 0 +; CHECK-32-PIC: stwux 1, 1, 0 +; CHECK-32-PIC: subf 0, 0, 1 +; CHECK-32-PIC: addic 0, 0, -4 +; CHECK-32-PIC: stwx 31, 0, 0 +; CHECK-32-PIC: addic 0, 0, -8 +; CHECK-32-PIC: stwx 29, 0, 0 +; CHECK-32-PIC: addic 29, 0, 12 ; CHECK-32: blr