Index: llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.h +++ llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.h @@ -72,12 +72,29 @@ */ void createTailCallBranchInstr(MachineBasicBlock &MBB) const; + /** + * Check if the conditions are correct to allow for the stack update + * to be moved past the CSR save/restore code. + */ + bool stackUpdateCanBeMoved(MachineFunction &MF) const; + public: PPCFrameLowering(const PPCSubtarget &STI); - unsigned determineFrameLayout(MachineFunction &MF, - bool UpdateMF = true, - bool UseEstimate = false) const; + /** + * Determine the frame layout and update the machine function. + */ + unsigned determineFrameLayoutAndUpdate(MachineFunction &MF, + bool UseEstimate = false) const; + + /** + * Determine the frame layout but do not update the machine function. + * The MachineFunction object can be const in this case as it is not + * modified. + */ + unsigned determineFrameLayout(const MachineFunction &MF, + bool UseEstimate = false, + unsigned *NewMaxCallFrameSize = nullptr) const; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. Index: llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -445,12 +445,26 @@ return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); } +/// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum +/// call frame size. Update the MachineFunction object with the stack size. +unsigned +PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, + bool UseEstimate) const { + unsigned NewMaxCallFrameSize = 0; + unsigned FrameSize = determineFrameLayout(MF, UseEstimate, + &NewMaxCallFrameSize); + MF.getFrameInfo().setStackSize(FrameSize); + MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); + return FrameSize; +} + /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. -unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, - bool UpdateMF, - bool UseEstimate) const { - MachineFrameInfo &MFI = MF.getFrameInfo(); +unsigned +PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, + bool UseEstimate, + unsigned *NewMaxCallFrameSize) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); // Get the number of bytes to allocate from the FrameInfo unsigned FrameSize = @@ -476,10 +490,7 @@ // Check whether we can skip adjusting the stack pointer (by using red zone) if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { - NumNoNeedForFrame++; // No need for frame - if (UpdateMF) - MFI.setStackSize(0); return 0; } @@ -495,9 +506,9 @@ if (MFI.hasVarSizedObjects()) maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; - // Update maximum call frame size. - if (UpdateMF) - MFI.setMaxCallFrameSize(maxCallFrameSize); + // Update the new max call frame size if the caller passes in a valid pointer. + if (NewMaxCallFrameSize) + *NewMaxCallFrameSize = maxCallFrameSize; // Include call frame size in total. FrameSize += maxCallFrameSize; @@ -505,10 +516,6 @@ // Make sure the frame is aligned. FrameSize = (FrameSize + AlignMask) & ~AlignMask; - // Update frame info. - if (UpdateMF) - MFI.setStackSize(FrameSize); - return FrameSize; } @@ -689,7 +696,7 @@ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); MachineFunction &MF = *(MBB->getParent()); bool HasBP = RegInfo->hasBasePointer(MF); - unsigned FrameSize = determineFrameLayout(MF, false); + unsigned FrameSize = determineFrameLayout(MF); int NegFrameSize = -FrameSize; bool IsLargeFrame = !isInt<16>(NegFrameSize); MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -712,6 +719,50 @@ return findScratchRegister(TmpMBB, true); } +bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { + const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + PPCFunctionInfo *FI = MF.getInfo(); + + // Abort if there is no register info or function info. + if (!RegInfo || !FI) + return false; + + // Only move the stack update on ELFv2 ABI and PPC64. + if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) + return false; + + // Check the frame size first and return false if it does not fit the + // requirements. + // We need a non-zero frame size as well as a frame that will fit in the red + // zone. This is because by moving the stack pointer update we are now storing + // to the red zone until the stack pointer is updated. If we get an interrupt + // inside the prologue but before the stack update we now have a number of + // stores to the red zone and those stores must all fit. + MachineFrameInfo &MFI = MF.getFrameInfo(); + unsigned FrameSize = MFI.getStackSize(); + if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) + return false; + + // Frame pointers and base pointers complicate matters so don't do anything + // if we have them. For example having a frame pointer will sometimes require + // a copy of r1 into r31 and that makes keeping track of updates to r1 more + // difficult. + if (hasFP(MF) || RegInfo->hasBasePointer(MF)) + return false; + + // Calls to fast_cc functions use different rules for passing parameters on + // the stack from the ABI and using PIC base in the function imposes + // similar restrictions to using the base pointer. It is not generally safe + // to move the stack pointer update in these situations. + if (FI->hasFastCall() || FI->usesPICBase()) + return false; + + // Finally we can move the stack update if we do not require regiser + // scavenging. Register scavenging can introduce more spills and so + // may make the frame size larger than we have computed. + return !RegInfo->requiresFrameIndexScavenging(MF); +} + void PPCFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -747,7 +798,7 @@ MBBI = MBB.begin(); // Work out frame sizes. - unsigned FrameSize = determineFrameLayout(MF); + unsigned FrameSize = determineFrameLayoutAndUpdate(MF); int NegFrameSize = -FrameSize; if (!isInt<32>(NegFrameSize)) llvm_unreachable("Unhandled stack size!"); @@ -854,6 +905,45 @@ assert((isPPC64 || !MustSaveCR) && "Prologue CR saving supported only in 64-bit mode"); + // Check if we can move the stack update instruction (stdu) down the prologue + // past the callee saves. Hopefully this will avoid the situation where the + // saves are waiting for the update on the store with update to complete. + MachineBasicBlock::iterator StackUpdateLoc = MBBI; + bool MovingStackUpdateDown = false; + + // Check if we can move the stack update. + if (stackUpdateCanBeMoved(MF)) { + const std::vector &Info = MFI.getCalleeSavedInfo(); + for (CalleeSavedInfo CSI : Info) { + int FrIdx = CSI.getFrameIdx(); + // If the frame index is not negative the callee saved info belongs to a + // stack object that is not a fixed stack object. We ignore non-fixed + // stack objects because we won't move the stack update pointer past them. + if (FrIdx >= 0) + continue; + + if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { + StackUpdateLoc++; + MovingStackUpdateDown = true; + } else { + // We need all of the Frame Indices to meet these conditions. + // If they do not, abort the whole operation. + StackUpdateLoc = MBBI; + MovingStackUpdateDown = false; + break; + } + } + + // If the operation was not aborted then update the object offset. + if (MovingStackUpdateDown) { + for (CalleeSavedInfo CSI : Info) { + int FrIdx = CSI.getFrameIdx(); + if (FrIdx < 0) + MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); + } + } + } + // If we need to spill the CR and the LR but we don't have two separate // registers available, we must spill them one at a time if (MustSaveCR && SingleScratchReg && MustSaveLR) { @@ -917,7 +1007,7 @@ } if (MustSaveLR) - BuildMI(MBB, MBBI, dl, StoreInst) + BuildMI(MBB, StackUpdateLoc, dl, StoreInst) .addReg(ScratchReg, getKillRegState(true)) .addImm(LROffset) .addReg(SPReg); @@ -985,7 +1075,7 @@ HasSTUX = true; } else if (!isLargeFrame) { - BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg) + BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) .addReg(SPReg) .addImm(NegFrameSize) .addReg(SPReg); @@ -1233,6 +1323,12 @@ .addCFIIndex(CFIRegister); } else { int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); + // We have changed the object offset above but we do not want to change + // the actual offsets in the CFI instruction so we have to undo the + // offset change here. + if (MovingStackUpdateDown) + Offset -= NegFrameSize; + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) @@ -1379,6 +1475,32 @@ unsigned RBReg = SPReg; unsigned SPAdd = 0; + // Check if we can move the stack update instruction up the epilogue + // past the callee saves. This will allow the move to LR instruction + // to be executed before the restores of the callee saves which means + // that the callee saves can hide the latency from the MTLR instrcution. + MachineBasicBlock::iterator StackUpdateLoc = MBBI; + if (stackUpdateCanBeMoved(MF)) { + const std::vector & Info = MFI.getCalleeSavedInfo(); + for (CalleeSavedInfo CSI : Info) { + int FrIdx = CSI.getFrameIdx(); + // If the frame index is not negative the callee saved info belongs to a + // stack object that is not a fixed stack object. We ignore non-fixed + // stack objects because we won't move the update of the stack pointer + // past them. + if (FrIdx >= 0) + continue; + + if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) + StackUpdateLoc--; + else { + // Abort the operation as we can't update all CSR restores. + StackUpdateLoc = MBBI; + break; + } + } + } + if (FrameSize) { // In the prologue, the loaded (or persistent) stack pointer value is // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red @@ -1408,7 +1530,7 @@ } } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { if (HasRedZone) { - BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) + BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) .addReg(SPReg) .addImm(FrameSize); } else { @@ -1432,7 +1554,7 @@ .addReg(FPReg); RBReg = FPReg; } - BuildMI(MBB, MBBI, dl, LoadInst, RBReg) + BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) .addImm(0) .addReg(SPReg); } @@ -1465,7 +1587,7 @@ // a base register anyway, because it may happen to be R0. bool LoadedLR = false; if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { - BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) + BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) .addImm(LROffset+SPAdd) .addReg(RBReg); LoadedLR = true; @@ -1537,7 +1659,7 @@ .addReg(TempReg, getKillRegState(i == e-1)); if (MustSaveLR) - BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg); + BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); // Callee pop calling convention. Pop parameter/linkage area. Used for tail // call optimization @@ -1946,7 +2068,7 @@ // the 16-bit immediate. We don't know the complete frame size here // because we've not yet computed callee-saved register spills or the // needed alignment padding. - unsigned StackSize = determineFrameLayout(MF, false, true); + unsigned StackSize = determineFrameLayout(MF, true); MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { Index: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1065,6 +1065,10 @@ OpcodeIndex = SOK_Float8Spill; } else if (PPC::F4RCRegClass.contains(Reg)) { OpcodeIndex = SOK_Float4Spill; + } else if (PPC::SPERCRegClass.contains(Reg)) { + OpcodeIndex = SOK_SPESpill; + } else if (PPC::SPE4RCRegClass.contains(Reg)) { + OpcodeIndex = SOK_SPE4Spill; } else if (PPC::CRRCRegClass.contains(Reg)) { OpcodeIndex = SOK_CRSpill; } else if (PPC::CRBITRCRegClass.contains(Reg)) { @@ -1151,6 +1155,10 @@ OpcodeIndex = SOK_Float8Spill; } else if (PPC::F4RCRegClass.contains(Reg)) { OpcodeIndex = SOK_Float4Spill; + } else if (PPC::SPERCRegClass.contains(Reg)) { + OpcodeIndex = SOK_SPESpill; + } else if (PPC::SPE4RCRegClass.contains(Reg)) { + OpcodeIndex = SOK_SPE4Spill; } else if (PPC::CRRCRegClass.contains(Reg)) { OpcodeIndex = SOK_CRSpill; } else if (PPC::CRBITRCRegClass.contains(Reg)) { Index: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h +++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h @@ -89,9 +89,7 @@ return true; } - bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { - return true; - } + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override { return true; Index: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -70,6 +70,8 @@ "caller preserved registers can be LICM candidates"), cl::init(true), cl::Hidden); +static unsigned offsetMinAlignForOpcode(unsigned OpC); + PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM) : PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR, TM.isPPC64() ? 0 : 1, @@ -315,6 +317,51 @@ return Reserved; } +bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const { + const PPCSubtarget &Subtarget = MF.getSubtarget(); + const PPCInstrInfo *InstrInfo = Subtarget.getInstrInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const std::vector &Info = MFI.getCalleeSavedInfo(); + + // If the callee saved info is invalid we have to default to true for safety. + if (!MFI.isCalleeSavedInfoValid()) + return true; + + // We will require the use of X-Forms because the frame is larger than what + // can be represented in signed 16 bits that fit in the immediate of a D-Form. + // If we need an X-Form then we need a register to store the address offset. + unsigned FrameSize = MFI.getStackSize(); + // Signed 16 bits means that the FrameSize cannot be more than 15 bits. + if (FrameSize & ~0x7FFF) + return true; + + // The callee saved info is valid so it can be traversed. + // Checking for registers that need saving that do not have load or store + // forms where the address offset is an immediate. + for (unsigned i = 0; i < Info.size(); i++) { + int FrIdx = Info[i].getFrameIdx(); + unsigned Reg = Info[i].getReg(); + + unsigned Opcode = InstrInfo->getStoreOpcodeForSpill(Reg); + if (!MFI.isFixedObjectIndex(FrIdx)) { + // This is not a fixed object. If it requires alignment then we may still + // need to use the XForm. + if (offsetMinAlignForOpcode(Opcode) > 1) + return true; + } + + // This is eiher: + // 1) A fixed frame index object which we know are aligned so + // as long as we have a valid DForm/DSForm/DQForm (non XForm) we don't + // need to consider the alignement here. + // 2) A not fixed object but in that case we now know that the min required + // alignment is no more than 1 based on the previous check. + if (InstrInfo->isXFormMemOp(Opcode)) + return true; + } + return false; +} + bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const { assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); @@ -825,9 +872,7 @@ } // If the offset must be a multiple of some value, return what that value is. -static unsigned offsetMinAlign(const MachineInstr &MI) { - unsigned OpC = MI.getOpcode(); - +static unsigned offsetMinAlignForOpcode(unsigned OpC) { switch (OpC) { default: return 1; @@ -852,6 +897,12 @@ } } +// If the offset must be a multiple of some value, return what that value is. +static unsigned offsetMinAlign(const MachineInstr &MI) { + unsigned OpC = MI.getOpcode(); + return offsetMinAlignForOpcode(OpC); +} + // Return the OffsetOperandNo given the FIOperandNum (and the instruction). static unsigned getOffsetONFromFION(const MachineInstr &MI, unsigned FIOperandNum) { @@ -1080,7 +1131,7 @@ MachineBasicBlock &MBB = *MI->getParent(); MachineFunction &MF = *MBB.getParent(); const PPCFrameLowering *TFI = getFrameLowering(MF); - unsigned StackEst = TFI->determineFrameLayout(MF, false, true); + unsigned StackEst = TFI->determineFrameLayout(MF, true); // If we likely don't need a stack frame, then we probably don't need a // virtual base register either. Index: llvm/trunk/test/CodeGen/PowerPC/CSR-fit.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/CSR-fit.ll +++ llvm/trunk/test/CodeGen/PowerPC/CSR-fit.ll @@ -11,49 +11,49 @@ ; CHECK-PWR8-LABEL: caller1: ; CHECK-PWR8: # %bb.0: # %entry ; CHECK-PWR8-NEXT: mflr r0 -; CHECK-PWR8-NEXT: std r0, 16(r1) -; CHECK-PWR8-NEXT: stdu r1, -176(r1) ; CHECK-PWR8-NEXT: .cfi_def_cfa_offset 176 ; CHECK-PWR8-NEXT: .cfi_offset lr, 16 ; CHECK-PWR8-NEXT: .cfi_offset r14, -144 ; CHECK-PWR8-NEXT: .cfi_offset r15, -136 -; CHECK-PWR8-NEXT: std r14, 32(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: std r15, 40(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: std r14, -144(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: std r15, -136(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: std r0, 16(r1) +; CHECK-PWR8-NEXT: stdu r1, -176(r1) ; CHECK-PWR8-NEXT: #APP ; CHECK-PWR8-NEXT: add r3, r3, r4 ; CHECK-PWR8-NEXT: #NO_APP ; CHECK-PWR8-NEXT: extsw r3, r3 ; CHECK-PWR8-NEXT: bl callee ; CHECK-PWR8-NEXT: nop -; CHECK-PWR8-NEXT: ld r15, 40(r1) # 8-byte Folded Reload -; CHECK-PWR8-NEXT: ld r14, 32(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: addi r1, r1, 176 ; CHECK-PWR8-NEXT: ld r0, 16(r1) ; CHECK-PWR8-NEXT: mtlr r0 +; CHECK-PWR8-NEXT: ld r15, -136(r1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: ld r14, -144(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: blr ; ; CHECK-PWR9-LABEL: caller1: ; CHECK-PWR9: # %bb.0: # %entry ; CHECK-PWR9-NEXT: mflr r0 -; CHECK-PWR9-NEXT: std r0, 16(r1) -; CHECK-PWR9-NEXT: stdu r1, -176(r1) ; CHECK-PWR9-NEXT: .cfi_def_cfa_offset 176 ; CHECK-PWR9-NEXT: .cfi_offset lr, 16 ; CHECK-PWR9-NEXT: .cfi_offset r14, -144 ; CHECK-PWR9-NEXT: .cfi_offset r15, -136 -; CHECK-PWR9-NEXT: std r14, 32(r1) # 8-byte Folded Spill -; CHECK-PWR9-NEXT: std r15, 40(r1) # 8-byte Folded Spill +; CHECK-PWR9-NEXT: std r14, -144(r1) # 8-byte Folded Spill +; CHECK-PWR9-NEXT: std r15, -136(r1) # 8-byte Folded Spill +; CHECK-PWR9-NEXT: std r0, 16(r1) +; CHECK-PWR9-NEXT: stdu r1, -176(r1) ; CHECK-PWR9-NEXT: #APP ; CHECK-PWR9-NEXT: add r3, r3, r4 ; CHECK-PWR9-NEXT: #NO_APP ; CHECK-PWR9-NEXT: extsw r3, r3 ; CHECK-PWR9-NEXT: bl callee ; CHECK-PWR9-NEXT: nop -; CHECK-PWR9-NEXT: ld r15, 40(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: ld r14, 32(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: addi r1, r1, 176 ; CHECK-PWR9-NEXT: ld r0, 16(r1) ; CHECK-PWR9-NEXT: mtlr r0 +; CHECK-PWR9-NEXT: ld r15, -136(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: ld r14, -144(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: blr entry: %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r14},~{r15}"(i32 %a, i32 %b) @@ -65,49 +65,49 @@ ; CHECK-PWR8-LABEL: caller2: ; CHECK-PWR8: # %bb.0: # %entry ; CHECK-PWR8-NEXT: mflr r0 -; CHECK-PWR8-NEXT: std r0, 16(r1) -; CHECK-PWR8-NEXT: stdu r1, -176(r1) ; CHECK-PWR8-NEXT: .cfi_def_cfa_offset 176 ; CHECK-PWR8-NEXT: .cfi_offset lr, 16 ; CHECK-PWR8-NEXT: .cfi_offset f14, -144 ; CHECK-PWR8-NEXT: .cfi_offset f15, -136 -; CHECK-PWR8-NEXT: stfd f14, 32(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: stfd f15, 40(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill +; CHECK-PWR8-NEXT: std r0, 16(r1) +; CHECK-PWR8-NEXT: stdu r1, -176(r1) ; CHECK-PWR8-NEXT: #APP ; CHECK-PWR8-NEXT: add r3, r3, r4 ; CHECK-PWR8-NEXT: #NO_APP ; CHECK-PWR8-NEXT: extsw r3, r3 ; CHECK-PWR8-NEXT: bl callee ; CHECK-PWR8-NEXT: nop -; CHECK-PWR8-NEXT: lfd f15, 40(r1) # 8-byte Folded Reload -; CHECK-PWR8-NEXT: lfd f14, 32(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: addi r1, r1, 176 ; CHECK-PWR8-NEXT: ld r0, 16(r1) ; CHECK-PWR8-NEXT: mtlr r0 +; CHECK-PWR8-NEXT: lfd f15, -136(r1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: lfd f14, -144(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: blr ; ; CHECK-PWR9-LABEL: caller2: ; CHECK-PWR9: # %bb.0: # %entry ; CHECK-PWR9-NEXT: mflr r0 -; CHECK-PWR9-NEXT: std r0, 16(r1) -; CHECK-PWR9-NEXT: stdu r1, -176(r1) ; CHECK-PWR9-NEXT: .cfi_def_cfa_offset 176 ; CHECK-PWR9-NEXT: .cfi_offset lr, 16 ; CHECK-PWR9-NEXT: .cfi_offset f14, -144 ; CHECK-PWR9-NEXT: .cfi_offset f15, -136 -; CHECK-PWR9-NEXT: stfd f14, 32(r1) # 8-byte Folded Spill -; CHECK-PWR9-NEXT: stfd f15, 40(r1) # 8-byte Folded Spill +; CHECK-PWR9-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill +; CHECK-PWR9-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill +; CHECK-PWR9-NEXT: std r0, 16(r1) +; CHECK-PWR9-NEXT: stdu r1, -176(r1) ; CHECK-PWR9-NEXT: #APP ; CHECK-PWR9-NEXT: add r3, r3, r4 ; CHECK-PWR9-NEXT: #NO_APP ; CHECK-PWR9-NEXT: extsw r3, r3 ; CHECK-PWR9-NEXT: bl callee ; CHECK-PWR9-NEXT: nop -; CHECK-PWR9-NEXT: lfd f15, 40(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f14, 32(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: addi r1, r1, 176 ; CHECK-PWR9-NEXT: ld r0, 16(r1) ; CHECK-PWR9-NEXT: mtlr r0 +; CHECK-PWR9-NEXT: lfd f15, -136(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f14, -144(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: blr entry: %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{f14},~{f15}"(i32 %a, i32 %b) Index: llvm/trunk/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll +++ llvm/trunk/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll @@ -15,13 +15,13 @@ define noalias i8* @_ZN2CC3funEv(%class.CC* %this) { ; CHECK-LABEL: _ZN2CC3funEv: ; CHECK: mflr 0 -; CHECK-NEXT: std 0, 16(1) -; CHECK-NEXT: stdu 1, -48(1) ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std 30, -16(1) +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -48(1) ; CHECK-NEXT: ld 12, 0(3) -; CHECK-NEXT: std 30, 32(1) ; CHECK-NEXT: mr 30, 3 ; CHECK-NEXT: std 2, 24(1) ; CHECK-NEXT: mtctr 12 @@ -38,11 +38,11 @@ ; CHECK-NEXT: mr 3, 30 ; CHECK-NEXT: bl _ZN2CC3barEPi ; CHECK-NEXT: nop -; CHECK: ld 30, 32(1) -; CHECK-NEXT: li 3, 0 +; CHECK: li 3, 0 ; CHECK-NEXT: addi 1, 1, 48 ; CHECK-NEXT: ld 0, 16(1) ; CHECK-NEXT: mtlr 0 +; CHECK: ld 30, -16(1) ; CHECK-NEXT: blr entry: %foo = getelementptr inbounds %class.CC, %class.CC* %this, i64 0, i32 0, i32 0 Index: llvm/trunk/test/CodeGen/PowerPC/not-fixed-frame-object.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/not-fixed-frame-object.ll +++ llvm/trunk/test/CodeGen/PowerPC/not-fixed-frame-object.ll @@ -6,8 +6,6 @@ ; CHECK-LABEL: caller: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: std r0, 16(r1) -; CHECK-NEXT: stdu r1, -192(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 192 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: .cfi_offset r14, -144 @@ -28,28 +26,30 @@ ; CHECK-NEXT: .cfi_offset r29, -24 ; CHECK-NEXT: .cfi_offset r30, -16 ; CHECK-NEXT: .cfi_offset r31, -8 +; CHECK-NEXT: std r14, -144(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r15, -136(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r16, -128(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r17, -120(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r18, -112(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r19, -104(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r20, -96(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r21, -88(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r22, -80(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r23, -72(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r24, -64(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r31, -8(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -192(r1) ; CHECK-NEXT: std r5, 32(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r3, 40(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r14, 48(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r15, 56(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r16, 64(r1) # 8-byte Folded Spill ; CHECK-NEXT: mr r0, r4 ; CHECK-NEXT: ld r3, 40(r1) # 8-byte Folded Reload -; CHECK-NEXT: std r17, 72(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r18, 80(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r19, 88(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r20, 96(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r21, 104(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r22, 112(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r23, 120(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r24, 128(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r25, 136(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r26, 144(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r27, 152(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r28, 160(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r29, 168(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, 176(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r31, 184(r1) # 8-byte Folded Spill ; CHECK-NEXT: #APP ; CHECK-NEXT: add r3, r3, r0 ; CHECK-NEXT: #NO_APP @@ -59,27 +59,27 @@ ; CHECK-NEXT: mr r5, r0 ; CHECK-NEXT: bl callee ; CHECK-NEXT: nop -; CHECK-NEXT: ld r31, 184(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r30, 176(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, 168(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r28, 160(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r27, 152(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r26, 144(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r25, 136(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r24, 128(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r23, 120(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r22, 112(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r21, 104(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r20, 96(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r19, 88(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r18, 80(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r17, 72(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r16, 64(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r15, 56(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r14, 48(r1) # 8-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 192 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: ld r31, -8(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r25, -56(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r24, -64(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r23, -72(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r22, -80(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r21, -88(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r20, -96(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r19, -104(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r18, -112(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r17, -120(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r16, -128(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r15, -136(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r14, -144(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr entry: %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13}"(i32 %a, i32 %b) Index: llvm/trunk/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll +++ llvm/trunk/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll @@ -110,7 +110,7 @@ ; ; Epilogue code. ; CHECK: mtlr {{[0-9]+}} -; CHECK-NEXT: blr +; CHECK: blr ; ; ENABLE: .[[ELSE_LABEL]]: # %if.else ; Shift second argument by one and store into returned register. @@ -171,7 +171,7 @@ ; Next BB ; CHECK: %for.exit ; CHECK: mtlr {{[0-9]+}} -; CHECK-NEXT: blr +; CHECK: blr define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { entry: br label %for.preheader @@ -209,9 +209,9 @@ ; Make sure we save the link register ; CHECK: mflr {{[0-9]+}} ; -; DISABLE: cmplwi 0, 3, 0 -; DISABLE-NEXT: std +; DISABLE: std ; DISABLE-NEXT: std +; DISABLE: cmplwi 0, 3, 0 ; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] ; ; Loop preheader @@ -240,7 +240,7 @@ ; DISABLE: .[[EPILOG_BB]]: # %if.end ; Epilog code ; CHECK: mtlr {{[0-9]+}} -; CHECK-NEXT: blr +; CHECK: blr ; ; ENABLE: .[[ELSE_LABEL]]: # %if.else ; Shift second argument by one and store into returned register. @@ -291,9 +291,9 @@ ; Make sure we save the link register ; CHECK: mflr {{[0-9]+}} ; -; DISABLE: cmplwi 0, 3, 0 -; DISABLE-NEXT: std +; DISABLE: std ; DISABLE-NEXT: std +; DISABLE: cmplwi 0, 3, 0 ; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] ; ; CHECK: bl somethingElse @@ -322,7 +322,7 @@ ; ; Epilogue code. ; CHECK: mtlr {{[0-9]+}} -; CHECK-NEXT: blr +; CHECK: blr ; ; ENABLE: .[[ELSE_LABEL]]: # %if.else ; Shift second argument by one and store into returned register. Index: llvm/trunk/test/CodeGen/PowerPC/tls_get_addr_clobbers.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/tls_get_addr_clobbers.ll +++ llvm/trunk/test/CodeGen/PowerPC/tls_get_addr_clobbers.ll @@ -6,7 +6,7 @@ entry: ; CHECK-LABEL: test_foo: -; CHECK: stdu 1, {{-?[0-9]+}}(1) +; CHECK-DAG: stdu 1, {{-?[0-9]+}}(1) ; CHECK-DAG: mr [[BACKUP_3:[0-9]+]], 3 ; CHECK-DAG: mr [[BACKUP_4:[0-9]+]], 4 ; CHECK-DAG: mr [[BACKUP_5:[0-9]+]], 5 @@ -15,14 +15,14 @@ ; CHECK-DAG: mr [[BACKUP_8:[0-9]+]], 8 ; CHECK-DAG: mr [[BACKUP_9:[0-9]+]], 9 ; CHECK-DAG: mr [[BACKUP_10:[0-9]+]], 10 -; CHECK-DAG: std [[BACKUP_3]], {{[0-9]+}}(1) -; CHECK-DAG: std [[BACKUP_4]], {{[0-9]+}}(1) -; CHECK-DAG: std [[BACKUP_5]], {{[0-9]+}}(1) -; CHECK-DAG: std [[BACKUP_6]], {{[0-9]+}}(1) -; CHECK-DAG: std [[BACKUP_7]], {{[0-9]+}}(1) -; CHECK-DAG: std [[BACKUP_8]], {{[0-9]+}}(1) -; CHECK-DAG: std [[BACKUP_9]], {{[0-9]+}}(1) -; CHECK-DAG: std [[BACKUP_10]], {{[0-9]+}}(1) +; CHECK-DAG: std [[BACKUP_3]], {{-?[0-9]+}}(1) +; CHECK-DAG: std [[BACKUP_4]], {{-?[0-9]+}}(1) +; CHECK-DAG: std [[BACKUP_5]], {{-?[0-9]+}}(1) +; CHECK-DAG: std [[BACKUP_6]], {{-?[0-9]+}}(1) +; CHECK-DAG: std [[BACKUP_7]], {{-?[0-9]+}}(1) +; CHECK-DAG: std [[BACKUP_8]], {{-?[0-9]+}}(1) +; CHECK-DAG: std [[BACKUP_9]], {{-?[0-9]+}}(1) +; CHECK-DAG: std [[BACKUP_10]], {{-?[0-9]+}}(1) ; CHECK: bl __tls_get_addr ; CHECK-DAG: stw 3, 0([[BACKUP_3]]) ; CHECK-DAG: stw 3, 0([[BACKUP_4]])