Index: llvm/include/llvm/CodeGen/TargetFrameLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -14,6 +14,7 @@ #define LLVM_CODEGEN_TARGETFRAMELOWERING_H #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/Support/TypeSize.h" #include @@ -389,7 +390,8 @@ /// By default, just maintain the original order. virtual void orderFrameObjects(const MachineFunction &MF, - SmallVectorImpl &objectsToAllocate) const { + SmallVectorImpl &objectsToAllocate, + const MachineBlockFrequencyInfo *MBFI) const { } /// Check whether or not the given \p MBB can be used as a prologue Index: llvm/lib/CodeGen/PrologEpilogInserter.cpp =================================================================== --- llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -121,6 +122,9 @@ // Emit remarks. MachineOptimizationRemarkEmitter *ORE = nullptr; + // Used by target for ordering of frame objects. + MachineBlockFrequencyInfo *MBFI = nullptr; + void calculateCallFrameInfo(MachineFunction &MF); void calculateSaveRestoreBlocks(MachineFunction &MF); void spillCalleeSavedRegs(MachineFunction &MF); @@ -159,6 +163,8 @@ AU.addPreserved(); AU.addPreserved(); AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -217,6 +223,7 @@ RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF); ORE = &getAnalysis().getORE(); + MBFI = &getAnalysis(); // Calculate the MaxCallFrameSize and AdjustsStack variables for the // function's frame information. Also eliminates call frame pseudo @@ -1052,7 +1059,7 @@ // Give the targets a chance to order the objects the way they like it. if (MF.getTarget().getOptLevel() != CodeGenOpt::None && MF.getTarget().Options.StackSymbolOrdering) - TFI.orderFrameObjects(MF, ObjectsToAllocate); + TFI.orderFrameObjects(MF, ObjectsToAllocate, MBFI); // Keep track of which bytes in the fixed and callee-save range are used so we // can use the holes when allocating later stack objects. Only do this if Index: llvm/lib/Target/AArch64/AArch64FrameLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -122,7 +122,8 @@ void orderFrameObjects(const MachineFunction &MF, - SmallVectorImpl &ObjectsToAllocate) const override; + SmallVectorImpl &ObjectsToAllocate, + const MachineBlockFrequencyInfo *MBFI) const override; private: /// Returns true if a homogeneous prolog or epilog code can be emitted Index: llvm/lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -3642,7 +3642,8 @@ } // namespace void AArch64FrameLowering::orderFrameObjects( - const MachineFunction &MF, SmallVectorImpl &ObjectsToAllocate) const { + const MachineFunction &MF, SmallVectorImpl &ObjectsToAllocate, + const MachineBlockFrequencyInfo *MBFI) const { if (!OrderFrameObjects || ObjectsToAllocate.empty()) return; Index: llvm/lib/Target/SystemZ/SystemZFrameLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -77,6 +77,9 @@ bool hasFP(const MachineFunction &MF) const override; StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override; + void orderFrameObjects(const MachineFunction &MF, + SmallVectorImpl &ObjectsToAllocate, + const MachineBlockFrequencyInfo *MBFI) const override; // Return the byte offset from the incoming stack pointer of Reg's // ABI-defined save slot. Return 0 if no slot is defined for Reg. Adjust Index: llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -18,9 +18,15 @@ #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/Function.h" #include "llvm/Target/TargetMachine.h" +#ifndef NDEBUG +#include +#include +#endif using namespace llvm; +#define DEBUG_TYPE "prologepilog" + namespace { // The ABI-defined register save slots, relative to the CFA (i.e. // incoming stack pointer + SystemZMC::ELFCallFrameSize). @@ -80,6 +86,323 @@ } } +namespace { +struct SZFrameSortingObj { + bool IsValid = false; // true if we care about this Object. + uint32_t ObjectIndex = 0; // Index of Object into MFI list. + uint64_t ObjectSize = 0; // Size of Object in bytes. + uint32_t MaxOffs = 0; // Max offset into object used. + Align ObjectAlign; // Object alignment. + float D12Freq = 0; // 12-bit displacement operands. + float DPairFreq = 0; // Operands having both 12/20 bit variants. +}; + +typedef std::vector SZFrameObjVec; + +struct SZFrameSorter { + SZFrameObjVec &SortingObjects; + const uint64_t BottomMargin; + const uint64_t StackAlign; + SZFrameObjVec::iterator IE; // End iterator for all objects to be sorted. + + SZFrameSorter(SZFrameObjVec &SOs, uint64_t BM, uint64_t SA) : + SortingObjects(SOs), BottomMargin(BM), StackAlign(SA) {} + + // Prepare for sorting, return true if there is work to do. + bool prepare() { + // First put all invalid and variable sized objects at the end. + auto Cmp0 = [](const SZFrameSortingObj &A, const SZFrameSortingObj &B) { + if (!A.IsValid || !B.IsValid) + return A.IsValid; + if (!A.ObjectSize || !B.ObjectSize) + return A.ObjectSize > 0; + return false; + }; + std::stable_sort(SortingObjects.begin(), SortingObjects.end(), Cmp0); + + // Set IE to point after last object to be sorted. + IE = SortingObjects.begin(); + for (; IE != SortingObjects.end(); IE++) + if (!IE->IsValid || !IE->ObjectSize) + break; + + // Reverse the original order so that it is preserved after the final + // reversal later. + std::reverse(SortingObjects.begin(), IE); + return std::distance(SortingObjects.begin(), IE) >= 2; + } + + // Return an iterator to one after the last object fully inside the given + // offset range. The size of each object is here a multiple of its + // alignment which means that after a second sorting by alignments no + // additional space will be needed. + SZFrameObjVec::iterator findRangeEnd(uint64_t Top) { + uint64_t RangeSize = BottomMargin; + SZFrameObjVec::iterator Itr = SortingObjects.begin(); + for (; Itr != IE; Itr++) { + uint64_t ObjSpace = Itr->ObjectSize; + if (RangeSize + ObjSpace > Top) + break; + RangeSize += ObjSpace; + if (RangeSize == Top) + break; + } + return Itr; + } + SZFrameObjVec::iterator findU12End() { return findRangeEnd(maxUIntN(12)); } + + // Function to sort by D12Freq densities, and then DPair secondarily, + // computed as (Freq / ObjectSize). The comparisons of two such fractions + // are refactored by multiplying both sides with A.ObjectSize * + // B.ObjectSize, in order to eliminate the (fp) divisions. + static bool CmpD12(const SZFrameSortingObj &A, const SZFrameSortingObj &B) { + if (A.D12Freq != B.D12Freq) + return A.D12Freq * B.ObjectSize > B.D12Freq * A.ObjectSize; + return A.DPairFreq * B.ObjectSize > B.DPairFreq * A.ObjectSize; + }; + + // Function to sort objects by alignment requirements. + static bool CmpAlign(const SZFrameSortingObj &A, const SZFrameSortingObj &B) { + return A.ObjectAlign > B.ObjectAlign; + }; + + void doIterativeSorting(const MachineFunction &MF) { + // No worries if all objects fit within the U12 offset limit. + if (findU12End() == IE) { + std::stable_sort(SortingObjects.begin(), IE, CmpAlign); + LLVM_DEBUG(dumpOrder("All within U12, sorted by alignments.");); + return; + } + + // Sort all objects. + std::stable_sort(SortingObjects.begin(), IE, CmpD12); + LLVM_DEBUG(dumpOrder("Sorted by D12 densities ");); + + // Find the lowest offset U12End must have in order for its max offset to + // be in range. + SZFrameObjVec::iterator U12End = findU12End(); + uint64_t TopUsed = U12End->MaxOffs < maxUIntN(12) ? + maxUIntN(12) - (U12End->MaxOffs + 1) : 0; + uint64_t TopUA = alignTo(TopUsed, U12End->ObjectAlign.value()); + TopUsed = TopUA != TopUsed ? TopUA - U12End->ObjectAlign.value() : TopUsed; + + // Find the current offset. + SZFrameObjVec::iterator CurrentPos = SortingObjects.begin(); + uint64_t CurrentOffset = BottomMargin; + for (; CurrentPos != U12End; CurrentPos++) + CurrentOffset += CurrentPos->ObjectSize; + + // Pack if possible smaller objects before the last (big) one inside U12. + for (++CurrentPos; + TopUsed > 0 && CurrentOffset < TopUsed - 1 && CurrentPos != IE; + CurrentPos++) + if (CurrentOffset + CurrentPos->ObjectSize < TopUsed) { + CurrentOffset += CurrentPos->ObjectSize; + std::iter_swap(U12End, CurrentPos); + if (std::distance(U12End, CurrentPos) > 1) + std::iter_swap(std::next(U12End), CurrentPos); + U12End++; + } + assert(U12End == findU12End() && "U12End iterator broken."); + LLVM_DEBUG(dumpOrder("After packing below U12 ");); + + // At last, sort the regions by alignments. + std::stable_sort(SortingObjects.begin(), U12End, CmpAlign); + if (CurrentOffset <= maxUIntN(12) && (U12End->D12Freq || U12End->DPairFreq)) + U12End++; + std::stable_sort(U12End, IE, CmpAlign); + LLVM_DEBUG(dumpOrder("After sorting by alignments");); + } + +#ifndef NDEBUG + void recordExpectedRanges(const MachineFrameInfo &MFI, + SystemZMachineFunctionInfo *ZFI) { + // Record which objects we expect to be within range for later assertion in + // eliminateFrameIndex() when the actual offset is known. + uint64_t CurrentOffset = BottomMargin; + SZFrameObjVec::iterator CurrentPos = SortingObjects.begin(); + SZFrameObjVec::iterator U12End = findU12End(); + for (; CurrentPos != U12End; CurrentPos++) { + assert(CurrentOffset == alignTo(CurrentOffset, CurrentPos->ObjectAlign) + && "Expected lower range to be sorted by alignments"); + CurrentOffset += CurrentPos->ObjectSize; + } + if (U12End != IE && CurrentOffset + U12End->MaxOffs <= maxUIntN(12)) { + ZFI->FullyInsideU12ObjectIdx = U12End->ObjectIndex; + ZFI->FullInsideU12MaxOffs = U12End->MaxOffs; + } + else if (U12End != SortingObjects.begin()) + ZFI->FullyInsideU12ObjectIdx = std::prev(U12End)->ObjectIndex; + + // Record the estimated stack size. Include the extra space for any + // callee saved regs outside the reg save area. + int64_t Extra = 0; + for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) { + int64_t FixedObjOffs = MFI.getObjectOffset(i); + Extra = std::max(Extra, -(FixedObjOffs + SystemZMC::ELFCallFrameSize)); + } + for (; CurrentPos != IE; CurrentPos++) { + CurrentOffset = alignTo(CurrentOffset, CurrentPos->ObjectAlign); + CurrentOffset += CurrentPos->ObjectSize; + } + ZFI->EstimatedStackSize = CurrentOffset + Extra; + } + + void dumpOrder(std::string Msg) { + dbgs() << Msg << " = { "; + for (unsigned I = 0; I < std::distance(SortingObjects.begin(), IE); I++) + dbgs () << SortingObjects[I].ObjectIndex << ", "; + dbgs() << "}\n"; + }; + + void dumpResult(StringRef F) { + dbgs() << "++ Reordered SystemZ frame objects for function " << F << " ++\n"; + dbgs() << "Estim offs FI Align Size Density D12Freq " + << "DPairFr TotFreq\n"; + uint64_t Offset = BottomMargin; + std::vector ObjSpaces(SortingObjects.size()); + unsigned ObjIdx = std::distance(SortingObjects.begin(), IE) - 1; + do { + SZFrameSortingObj &Obj = SortingObjects[ObjIdx]; + uint64_t S = alignTo(Offset, Obj.ObjectAlign) - Offset + Obj.ObjectSize; + ObjSpaces[ObjIdx] = S; + Offset += S; + } while (ObjIdx--); + + for (unsigned ObjIdx = 0; ObjIdx < SortingObjects.size(); ObjIdx++) { + SZFrameSortingObj &Obj = SortingObjects[ObjIdx]; + if (!Obj.IsValid) + break; + Offset -= Obj.ObjectSize; + std::stringstream ObjStrS; + ObjStrS << std::setw(10) << Offset << " " + << std::setw(4) << Obj.ObjectIndex << " "; + float TotFreq = Obj.D12Freq + Obj.DPairFreq; + float P = TotFreq / Obj.ObjectSize; + uint32_t AlignSpace = ObjSpaces[ObjIdx] - Obj.ObjectSize; + if (Obj.ObjectSize) + ObjStrS << std::setw(2) << Obj.ObjectAlign.value() + << (AlignSpace ? "_ " : " ") + << std::setw(8) << Obj.ObjectSize << " " + << std::setw(8) << std::setprecision(3) << std::left << P << " " + << std::right + << std::setw(8) << std::setprecision(3) << Obj.D12Freq << " " + << std::setw(8) << std::setprecision(3) << Obj.DPairFreq << " " + << std::setw(8) << std::setprecision(3) << TotFreq; + else + ObjStrS << " var"; + dbgs() << ObjStrS.str() << "\n"; + Offset -= ObjSpaces[ObjIdx] - Obj.ObjectSize; + } + assert(Offset == BottomMargin); + dbgs() << "\n"; + } +#endif +}; + +} // namespace + +// TODO: Move to base class. +void SystemZELFFrameLowering::orderFrameObjects(const MachineFunction &MF, + SmallVectorImpl &ObjectsToAllocate, + const MachineBlockFrequencyInfo *MBFI) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const SystemZInstrInfo *TII = + static_cast(MF.getSubtarget().getInstrInfo()); + + // Make a vector of sorting objects to mirror all MFI objects and mark + // those to be sorted as valid. + if (ObjectsToAllocate.size() <= 1) + return; + SZFrameObjVec SortingObjects(MFI.getObjectIndexEnd()); + for (auto &Obj : ObjectsToAllocate) { + SortingObjects[Obj].IsValid = true; + SortingObjects[Obj].ObjectIndex = Obj; + // Simplify by rounding the size up to nearest multiple of the alignment. + Align OA = MFI.getObjectAlign(Obj); + SortingObjects[Obj].ObjectSize = alignTo(MFI.getObjectSize(Obj), OA); + SortingObjects[Obj].ObjectAlign = OA; + } + + // Find the offset below the objects to be sorted, which includes the reg + // save area and maximum call frame size and any other objects such as reg + // scavenger slots. Also account for a final SP alignment. + const uint64_t StackAlign = getStackAlign().value(); + uint64_t BottomMargin = + SystemZMC::ELFCallFrameSize + MFI.getMaxCallFrameSize() + StackAlign; + for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) + if (!SortingObjects[i].IsValid && !MFI.isDeadObjectIndex(i)) + BottomMargin += alignTo(MFI.getObjectSize(i), StackAlign); + + // Examine uses for each interesting object and record displacement + // types. A use instruction can have either a 12-bit or 20-bit displacement + // operand, or both ("pair"). Also record the max offset used per object. + for (auto &MBB : MF) + for (auto &MI : MBB) { + float MBBFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB); + if (MI.isDebugInstr()) + continue; + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI.getOperand(I); + if (!MO.isFI()) + continue; + int Index = MO.getIndex(); + if (Index >= 0 && Index < MFI.getObjectIndexEnd() && + SortingObjects[Index].IsValid) { + int64_t Offs = MI.getOperand(I + 1).getImm(); + if (Offs >= 0 && Offs <= int32_t(maxUIntN(12))) { + if (uint32_t(Offs) > SortingObjects[Index].MaxOffs) + SortingObjects[Index].MaxOffs = uint32_t(Offs); + if (TII->hasDisplacementPairInsn(MI.getOpcode())) + SortingObjects[Index].DPairFreq += MBBFreq; + else if (!(MI.getDesc().TSFlags & SystemZII::Has20BitOffset)) + SortingObjects[Index].D12Freq += MBBFreq; + } + } + } + } + + SZFrameSorter FS(SortingObjects, BottomMargin, StackAlign); + if (!FS.prepare()) + return; + FS.doIterativeSorting(MF); + +#ifndef NDEBUG + SystemZMachineFunctionInfo *ZFI = const_cast + (MF.getInfo()); + FS.recordExpectedRanges(MFI, ZFI); +#endif + + // A higher density object needs to go after in the list in order for it to + // end up lower on the stack. Reverse the sorted elements and any var-sized + // objects will follow them. + std::reverse(SortingObjects.begin(), FS.IE); + + // Now modify the original list to represent the final order that + // we want. + unsigned Idx = 0; + for (auto &Obj : SortingObjects) { + // All invalid items are sorted at the end, so it's safe to stop. + if (!Obj.IsValid) + break; + ObjectsToAllocate[Idx++] = Obj.ObjectIndex; + } + +#ifndef NDEBUG + // Sanity check the result. + assert(Idx == ObjectsToAllocate.size() && "Objects sorting havoc!"); + while (Idx < SortingObjects.size()) + assert(!SortingObjects[Idx++].IsValid && "Objects sorting havoc!"); + std::set IndexSet; + for (auto &Obj : ObjectsToAllocate) { + bool UniqueIdx = IndexSet.insert(Obj).second; + assert(UniqueIdx && "Objects sorting havoc!"); + } +#endif + + LLVM_DEBUG(FS.dumpResult(MF.getFunction().getName())); +} + bool SystemZFrameLowering::hasReservedCallFrame( const MachineFunction &MF) const { // The ELF ABI requires us to allocate 160 bytes of stack space for the @@ -385,7 +708,7 @@ // We may need register scavenging slots if some parts of the frame // are outside the reach of an unsigned 12-bit displacement. // Create 2 for the case where both addresses in an MVC are - // out of range. + // out of range. TODO: possible to improve (see orderFrameObjects)? RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, Align(8), false)); RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, Align(8), false)); } @@ -531,6 +854,12 @@ MFFrame.setStackSize(StackSize); if (StackSize) { +#ifndef NDEBUG + SystemZMachineFunctionInfo *ZFI = MF.getInfo(); + assert(StackSize <= ZFI->EstimatedStackSize && + "Stack size underestimated in orderFrameObjects()."); +#endif + // Allocate StackSize bytes. int64_t Delta = -int64_t(StackSize); const unsigned ProbeSize = TLI.getStackProbeSize(MF); Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -312,6 +312,9 @@ // exists. unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const; + // Return true if Opcode has a mapping in 12 <-> 20 bit displacements. + bool hasDisplacementPairInsn(unsigned Opcode) const; + // If Opcode is a load instruction that has a LOAD AND TEST form, // return the opcode for the testing form, otherwise return 0. unsigned getLoadAndTest(unsigned Opcode) const; Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -1652,6 +1652,13 @@ return 0; } +bool SystemZInstrInfo::hasDisplacementPairInsn(unsigned Opcode) const { + const MCInstrDesc &MCID = get(Opcode); + if (MCID.TSFlags & SystemZII::Has20BitOffset) + return SystemZ::getDisp12Opcode(Opcode) >= 0; + return SystemZ::getDisp20Opcode(Opcode) >= 0; +} + unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const { switch (Opcode) { case SystemZ::L: return SystemZ::LT; Index: llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -38,10 +38,23 @@ unsigned NumLocalDynamics; public: +#ifndef NDEBUG + // These are set by orderFrameObjects() and checked in other places. + int32_t FullyInsideU12ObjectIdx; // Expected to be within U12 offset range. + uint32_t FullInsideU12MaxOffs; // Check only users with MI-offset <= this. + uint64_t EstimatedStackSize; // Expected size >= final stack size. +#endif + explicit SystemZMachineFunctionInfo(MachineFunction &MF) : VarArgsFirstGPR(0), VarArgsFirstFPR(0), VarArgsFrameIndex(0), RegSaveFrameIndex(0), FramePointerSaveIndex(0), ManipulatesSP(false), - NumLocalDynamics(0) {} + NumLocalDynamics(0) { +#ifndef NDEBUG + FullyInsideU12ObjectIdx = INT32_MIN; + FullInsideU12MaxOffs = ~0U; + EstimatedStackSize = ~0UL; +#endif + } // Get and set the first and last call-saved GPR that should be saved by // this function and the SP offset for the STMG. These are 0 if no GPRs Index: llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -298,9 +298,16 @@ // Decompose the frame index into a base and offset. int FrameIndex = MI->getOperand(FIOperandNum).getIndex(); Register BasePtr; + int64_t MIOffs = MI->getOperand(FIOperandNum + 1).getImm(); int64_t Offset = - (TFI->getFrameIndexReference(MF, FrameIndex, BasePtr).getFixed() + - MI->getOperand(FIOperandNum + 1).getImm()); + (TFI->getFrameIndexReference(MF, FrameIndex, BasePtr).getFixed() + MIOffs); + +#ifndef NDEBUG + SystemZMachineFunctionInfo *ZFI = MF.getInfo(); + assert((FrameIndex != ZFI->FullyInsideU12ObjectIdx || + uint64_t(MIOffs) > ZFI->FullInsideU12MaxOffs || + Offset <= int(maxUIntN(12))) && "Unexpected offset out of U12 range!"); +#endif // Special handling of dbg_value instructions. if (MI->isDebugValue()) { Index: llvm/lib/Target/X86/X86FrameLowering.h =================================================================== --- llvm/lib/Target/X86/X86FrameLowering.h +++ llvm/lib/Target/X86/X86FrameLowering.h @@ -173,7 +173,8 @@ /// The heuristic we use is to try and pack them according to static number /// of uses and size in order to minimize code size. void orderFrameObjects(const MachineFunction &MF, - SmallVectorImpl &ObjectsToAllocate) const override; + SmallVectorImpl &ObjectsToAllocate, + const MachineBlockFrequencyInfo *MBFI) const override; /// Wraps up getting a CFI index and building a MachineInstr for it. void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Index: llvm/lib/Target/X86/X86FrameLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86FrameLowering.cpp +++ llvm/lib/Target/X86/X86FrameLowering.cpp @@ -3576,7 +3576,8 @@ // The heuristic we use is to try and pack them according to static number // of uses and size of object in order to minimize code size. void X86FrameLowering::orderFrameObjects( - const MachineFunction &MF, SmallVectorImpl &ObjectsToAllocate) const { + const MachineFunction &MF, SmallVectorImpl &ObjectsToAllocate, + const MachineBlockFrequencyInfo *MBFI) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); // Don't waste time if there's nothing to do. Index: llvm/test/CodeGen/AArch64/O0-pipeline.ll =================================================================== --- llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -50,6 +50,9 @@ ; CHECK-NEXT: Fixup Statepoint Caller Saved ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: Machine Natural Loop Construction +; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization ; CHECK-NEXT: Post-RA pseudo instruction expansion pass ; CHECK-NEXT: AArch64 pseudo instruction expansion pass Index: llvm/test/CodeGen/AMDGPU/llc-pipeline.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -123,6 +123,9 @@ ; GCN-O0-NEXT: Fixup Statepoint Caller Saved ; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis ; GCN-O0-NEXT: Machine Optimization Remark Emitter +; GCN-O0-NEXT: MachineDominator Tree Construction +; GCN-O0-NEXT: Machine Natural Loop Construction +; GCN-O0-NEXT: Machine Block Frequency Analysis ; GCN-O0-NEXT: Prologue/Epilogue Insertion & Frame Finalization ; GCN-O0-NEXT: Post-RA pseudo instruction expansion pass ; GCN-O0-NEXT: SI post-RA bundler Index: llvm/test/CodeGen/SystemZ/foldmemop-vec-binops.mir =================================================================== --- llvm/test/CodeGen/SystemZ/foldmemop-vec-binops.mir +++ llvm/test/CodeGen/SystemZ/foldmemop-vec-binops.mir @@ -133,7 +133,7 @@ # CHECK-LABEL: fun3: -# CHECK: aeb %f0, 164(%r15) # 4-byte Folded Reload +# CHECK: aeb %f0, 172(%r15) # 4-byte Folded Reload --- name: fun3 alignment: 16 @@ -166,7 +166,7 @@ # CHECK-LABEL: fun4: -# CHECK: aeb %f0, 164(%r15) # 4-byte Folded Reload +# CHECK: aeb %f0, 172(%r15) # 4-byte Folded Reload --- name: fun4 alignment: 16 @@ -332,7 +332,7 @@ # CHECK-LABEL: fun9: -# CHECK: seb %f0, 164(%r15) # 4-byte Folded Reload +# CHECK: seb %f0, 172(%r15) # 4-byte Folded Reload --- name: fun9 alignment: 16 @@ -530,7 +530,7 @@ # CHECK-LABEL: fun15: -# CHECK: deb %f0, 164(%r15) # 4-byte Folded Reload +# CHECK: deb %f0, 172(%r15) # 4-byte Folded Reload --- name: fun15 alignment: 16 @@ -729,7 +729,7 @@ # CHECK-LABEL: fun21: -# CHECK: meeb %f0, 164(%r15) # 4-byte Folded Reload +# CHECK: meeb %f0, 172(%r15) # 4-byte Folded Reload --- name: fun21 alignment: 16 @@ -762,7 +762,7 @@ # CHECK-LABEL: fun22: -# CHECK: meeb %f0, 164(%r15) # 4-byte Folded Reload +# CHECK: meeb %f0, 172(%r15) # 4-byte Folded Reload --- name: fun22 alignment: 16 Index: llvm/test/CodeGen/SystemZ/foldmemop-vec-cmp.mir =================================================================== --- llvm/test/CodeGen/SystemZ/foldmemop-vec-cmp.mir +++ llvm/test/CodeGen/SystemZ/foldmemop-vec-cmp.mir @@ -131,7 +131,7 @@ # CHECK-LABEL: fun3: -# CHECK: ceb %f0, 164(%r15) # 4-byte Folded Reload +# CHECK: ceb %f0, 172(%r15) # 4-byte Folded Reload --- name: fun3 alignment: 16 @@ -167,7 +167,7 @@ # CHECK-LABEL: fun4: -# CHECK: ceb %f0, 164(%r15) # 4-byte Folded Reload +# CHECK: ceb %f0, 172(%r15) # 4-byte Folded Reload --- name: fun4 alignment: 16 @@ -204,7 +204,7 @@ # CEB can't be used if one operand is a VR32 (and not FP32). # CHECK-LABEL: fun5: -# CHECK: lde %f0, 164(%r15) # 4-byte Folded Reload +# CHECK: lde %f0, 172(%r15) # 4-byte Folded Reload # CHECK-NEXT: wfcsb %v16, %f0 --- name: fun5 @@ -351,7 +351,7 @@ # CHECK-LABEL: fun9: -# CHECK: keb %f0, 164(%r15) # 4-byte Folded Reload +# CHECK: keb %f0, 172(%r15) # 4-byte Folded Reload --- name: fun9 alignment: 16 @@ -387,7 +387,7 @@ # CHECK-LABEL: fun10: -# CHECK: keb %f0, 164(%r15) # 4-byte Folded Reload +# CHECK: keb %f0, 172(%r15) # 4-byte Folded Reload --- name: fun10 alignment: 16 @@ -424,7 +424,7 @@ # CEB can't be used if one operand is a VR32 (and not FP32). # CHECK-LABEL: fun11: -# CHECK: lde %f0, 164(%r15) # 4-byte Folded Reload +# CHECK: lde %f0, 172(%r15) # 4-byte Folded Reload # CHECK-NEXT: wfksb %v16, %f0 --- name: fun11 @@ -463,7 +463,7 @@ # This test case involves a 128 bit operand (VGMF generating float 1.0). # CHECK-LABEL: fun12: # CHECK: vgmf %v0, 2, 8 -# CHECK-NEXT: ceb %f0, 164(%r15) # 4-byte Folded Reload +# CHECK-NEXT: ceb %f0, 172(%r15) # 4-byte Folded Reload --- name: fun12 alignment: 16 Index: llvm/test/CodeGen/SystemZ/foldmemop-vec-fusedfp.mir =================================================================== --- llvm/test/CodeGen/SystemZ/foldmemop-vec-fusedfp.mir +++ llvm/test/CodeGen/SystemZ/foldmemop-vec-fusedfp.mir @@ -165,7 +165,7 @@ # CHECK-LABEL: fun4: -# CHECK: maeb %f0, %f1, 160(%r15) # 4-byte Folded Reload +# CHECK: maeb %f0, %f1, 168(%r15) # 4-byte Folded Reload --- name: fun4 alignment: 16 @@ -200,7 +200,7 @@ # CHECK-LABEL: fun5: -# CHECK: maeb %f0, %f1, 160(%r15) # 4-byte Folded Reload +# CHECK: maeb %f0, %f1, 168(%r15) # 4-byte Folded Reload --- name: fun5 alignment: 16 @@ -448,7 +448,7 @@ # CHECK-LABEL: fun12: -# CHECK: mseb %f0, %f1, 160(%r15) # 4-byte Folded Reload +# CHECK: mseb %f0, %f1, 168(%r15) # 4-byte Folded Reload --- name: fun12 alignment: 16 @@ -483,7 +483,7 @@ # CHECK-LABEL: fun13: -# CHECK: mseb %f0, %f1, 160(%r15) # 4-byte Folded Reload +# CHECK: mseb %f0, %f1, 168(%r15) # 4-byte Folded Reload --- name: fun13 alignment: 16 Index: llvm/test/CodeGen/SystemZ/frame-27.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/frame-27.mir @@ -0,0 +1,421 @@ +# RUN: llc -mtriple=s390x-linux-gnu -start-before=prologepilog %s -o - -mcpu=z14 \ +# RUN: -debug-only=prologepilog -print-after=prologepilog -verify-machineinstrs 2>&1 \ +# RUN: | FileCheck %s +# REQUIRES: asserts +# +# Test that stack objects are ordered in a good way with respect to the +# displacement operands of users. + +--- | + define void @f1() { ret void } + define void @f2() { ret void } + define void @f3() { ret void } + define void @f4() { ret void } + define void @f5() { ret void } + define void @f6() { ret void } + define void @f7() { ret void } + define void @f8() { ret void } + define void @f9() { ret void } + define void @f10() { ret void } + define void @f11() { ret void } + define void @f12() { ret void } + +... + +### Test that %stack.0 is placed close to its D12 user. +# CHECK: After sorting by alignments = { 0, 1, } +# CHECK-NEXT: Reordered SystemZ frame objects for function f1 +# CHECK-NOT: LAY +# CHECK: VL32 +--- +name: f1 +tracksRegLiveness: true +stack: + - { id: 0, size: 16 } + - { id: 1, size: 4095 } +machineFunctionInfo: {} +body: | + bb.0: + renamable $f0s = VL32 %stack.0, 0, $noreg + Return + +... + +### Test that %stack.1 is placed close to its D12 user. +# CHECK: After sorting by alignments = { 1, 0, } +# CHECK-NEXT: Reordered SystemZ frame objects for function f2 +# CHECK-NOT: LAY +# CHECK: VL32 +--- +name: f2 +tracksRegLiveness: true +stack: + - { id: 0, size: 16 } + - { id: 1, size: 4095 } +machineFunctionInfo: {} +body: | + bb.0: + renamable $f0s = VL32 %stack.1, 3916, $noreg + Return + +... + +# Test that the single store inside the loop is prioritized over the two +# stores after the loop. +# CHECK: After sorting by alignments = { 0, 1, } +# CHECK-NEXT: Reordered SystemZ frame objects for function f3 +# CHECK-NEXT: Estim offs FI Align Size Density D12Freq DPairFr TotFreq +# CHECK-NEXT: 4279 1 1 4095 0.000488 2 0 2 +# CHECK-NEXT: 184 0 1 4095 0.00778 31.9 0 31.9 + +# CHECK-LABEL: bb.1: +# CHECK-NOT: LAY +# CHECK: VST32 +# CHECK-LABEL: bb.2: +# CHECK: LAY +# CHECK-NEXT: VST32 +# CHECK-NEXT: LAY +# CHECK-NEXT: VST32 +--- +name: f3 +tracksRegLiveness: true +stack: + - { id: 0, size: 4095 } + - { id: 1, size: 4095 } +machineFunctionInfo: {} +body: | + bb.0: + liveins: $f0s, $f2s + + renamable $r0d = LGHI 100 + + bb.1: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $f0s, $f2s, $r0d + + VST32 renamable $f0s, %stack.0, 0, $noreg + renamable $r0d = nsw AGHIK killed renamable $r0d, -1, implicit-def dead $cc + CGHI renamable $r0d, 0, implicit-def $cc + BRC 14, 6, %bb.1, implicit killed $cc + J %bb.2 + + bb.2: + liveins: $f2s + + VST32 renamable $f2s, %stack.1, 0, $noreg + VST32 killed renamable $f2s, %stack.1, 0, $noreg + Return + +... + +### Swap the order of the objects so that both accesses are in range. +# CHECK: After sorting by alignments = { 0, 1, } +# CHECK-NEXT: Reordered SystemZ frame objects for function f4 +# CHECK-NEXT: Estim offs FI Align Size Density D12Freq DPairFr TotFreq +# CHECK-NEXT: 4279 1 1 8190 0.000122 0 1 1 +# CHECK-NEXT: 184 0 1 4095 0.000244 1 0 1 +# CHECK-NOT: LAY +# CHECK: VL32 +# CHECK-NOT: LAY +# CHECK: LEY +--- +name: f4 +tracksRegLiveness: true +stack: + - { id: 0, size: 4095 } + - { id: 1, size: 8190 } +machineFunctionInfo: {} +body: | + bb.0: + renamable $f0s = VL32 %stack.0, 0, $noreg + renamable $f0s = LE %stack.1, 0, $noreg + Return + +... + +### Reorder the objects so that all accesses are in range. +# CHECK: After sorting by alignments = { 4, 1, 3, 2, 0, } +# CHECK-NEXT: Reordered SystemZ frame objects for function f5 +# CHECK-NEXT: Estim offs FI Align Size Density D12Freq DPairFr TotFreq +# CHECK-NEXT: 20658 0 1 8190 0.000122 0 1 1 +# CHECK-NEXT: 12468 2 1 8190 0.000122 0 1 1 +# CHECK-NEXT: 4278 3 1 8190 0.000122 0 1 1 +# CHECK-NEXT: 2231 1 1 2047 0.000489 1 0 1 +# CHECK-NEXT: 184 4 1 2047 0.000489 1 0 1 +# CHECK-NOT: LAY +# CHECK: LEY +# CHECK-NEXT: VL32 +# CHECK-NEXT: LEY +# CHECK-NEXT: LEY +# CHECK-NEXT: VL32 +--- +name: f5 +tracksRegLiveness: true +stack: + - { id: 0, size: 8190 } + - { id: 1, size: 2047 } + - { id: 2, size: 8190 } + - { id: 3, size: 8190 } + - { id: 4, size: 2047 } +machineFunctionInfo: {} +body: | + bb.0: + renamable $f2s = LE %stack.0, 0, $noreg + renamable $f0s = VL32 %stack.1, 0, $noreg + renamable $f3s = LEY %stack.2, 0, $noreg + renamable $f4s = LE %stack.3, 0, $noreg + renamable $f1s = VL32 %stack.4, 0, $noreg + Return + +... + +### Reorder the objects so that the VL32 object is in range and the LYs are +### shortened to Ls (STOC cannot be shortened). +# CHECK: After sorting by alignments = { 3, 2, 1, 0, } +# CHECK-NEXT: Reordered SystemZ frame objects for function f6 +# CHECK-NEXT: Estim offs FI Align Size Density D12Freq DPairFr TotFreq +# CHECK-NEXT: 18611 0 1 8190 0 0 0 0 +# CHECK-NEXT: 10421 1 1 8190 0 0 0 0 +# CHECK-NEXT: 2231 2 1 8190 0.000244 0 2 2 +# CHECK-NEXT: 184 3 1 2047 0.000489 1 0 1 +# CHECK-NOT: LAY +# CHECK: $r1l = L $r15 +# CHECK-NEXT: $r1l = L $r15 +# CHECK-NEXT: IMPLICIT_DEF +# CHECK-NEXT: STOC +# CHECK-NEXT: STOC +# CHECK-NEXT: VL32 +--- +name: f6 +tracksRegLiveness: true +stack: + - { id: 0, size: 8190 } + - { id: 1, size: 8190 } + - { id: 2, size: 8190 } + - { id: 3, size: 2047 } +machineFunctionInfo: {} +body: | + bb.0: + $r1l = LY %stack.2, 0, $noreg + $r1l = LY %stack.2, 0, $noreg + $cc = IMPLICIT_DEF + STOC $r1l, %stack.0, 0, 14, 8, implicit $cc + STOC $r1l, %stack.1, 0, 14, 8, implicit $cc + renamable $f3s = VL32 %stack.3, 0, $noreg + Return + +... + +### Test handling of a variable sized object. +# CHECK: Reordered SystemZ frame objects for function f7 +# CHECK-NEXT: Estim offs FI Align Size Density D12Freq DPairFr TotFreq +# CHECK-NEXT: 484 0 1 300 0.00333 1 0 1 +# CHECK-NEXT: 168 1 1 316 0.00316 1 0 1 +# CHECK-NEXT: 168 2 var +# CHECK: $r15d = AGHI $r15d(tied-def 0), -776, implicit-def dead $cc +# CHECK: $r11d = LGR $r15d +# CHECK: renamable $r2d = ADJDYNALLOC renamable $r1d, 0, $noreg +# CHECK: VST64 renamable $f0d, $r11d, 476, $noreg +# CHECK: VST32 renamable $f1s, $r11d, 160, $noreg +# CHECK: VST32 killed renamable $f0s, killed renamable $r2d, 0, $noreg +--- +name: f7 +tracksRegLiveness: true +stack: + - { id: 0, size: 300 } + - { id: 1, size: 316 } + - { id: 2, type: variable-sized } +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $f0d, $f0s, $f1s, $r2l + + renamable $r2l = KILL $r2l, implicit-def $r2d + renamable $r1d = RISBGN undef renamable $r1d, killed renamable $r2d, 30, 189, 2 + renamable $r0d = nuw LA killed renamable $r1d, 7, $noreg + renamable $r0d = RISBGN undef renamable $r0d, killed renamable $r0d, 29, 188, 0 + renamable $r1d = SGRK $r15d, killed renamable $r0d, implicit-def dead $cc + renamable $r2d = ADJDYNALLOC renamable $r1d, 0, $noreg + $r15d = COPY killed renamable $r1d + VST64 renamable $f0d, %stack.0, 0, $noreg + VST32 renamable $f1s, %stack.1, 0, $noreg + VST32 killed renamable $f0s, killed renamable $r2d, 0, $noreg + Return + +... + +### Test sorting of objects to minimize alignment gaps. +# CHECK: Reordered SystemZ frame objects for function f8 +# CHECK-NEXT: Estim offs FI Align Size Density D12Freq DPairFr TotFreq +# CHECK-NEXT: 208 7 1 5 0 0 0 0 +# CHECK-NEXT: 206 1 2 2 0 0 0 0 +# CHECK-NEXT: 204 4 2 2 0 0 0 0 +# CHECK-NEXT: 200 5 2 4 0 0 0 0 +# CHECK-NEXT: 196 2 4 4 0 0 0 0 +# CHECK-NEXT: 192 6 4 4 0 0 0 0 +# CHECK-NEXT: 176 0 8 16 0 0 0 0 +# CHECK-NEXT: 168 3 8 8 0 0 0 0 +--- +name: f8 +tracksRegLiveness: true +stack: + - { id: 0, size: 16, alignment: 8 } + - { id: 1, size: 1, alignment: 2 } + - { id: 2, size: 4, alignment: 4 } + - { id: 3, size: 8, alignment: 8 } + - { id: 4, size: 2, alignment: 2 } + - { id: 5, size: 3, alignment: 2 } + - { id: 6, size: 4, alignment: 4 } + - { id: 7, size: 5, alignment: 1 } +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + Return + +... + +### Test sorting involving both frequencies and alignments. +### FI(1), FI(0) and FI(2) should go first sorted by their frequencies as they +### have D12 users. FI(0) and FI(1) form one range sorted by alignments, then FI(2) +### is left in place, and FI(4) and FI(3) are then sorted by alignments above. +# CHECK: Reordered SystemZ frame objects for function f9 +# CHECK-NEXT: Estim offs FI Align Size Density D12Freq DPairFr TotFreq +# CHECK-NEXT: 205544 3 2 200002 1e-05 0 2 2 +# CHECK-NEXT: 5544 4 8 200000 5e-06 0 1 1 +# CHECK-NEXT: 3476 2 4 2068 0.000484 1 0 1 +# CHECK-NEXT: 2688 1 2 788 0.00254 2 0 2 +# CHECK-NEXT: 184 0 4 2504 0.000799 2 0 2 +--- +name: f9 +tracksRegLiveness: true +stack: + - { id: 0, size: 2502, alignment: 4 } + - { id: 1, size: 788, alignment: 2 } + - { id: 2, size: 2068, alignment: 4 } + - { id: 3, size: 200002, alignment: 2 } + - { id: 4, size: 200000, alignment: 8 } + +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $r1l + + renamable $f1s = VL32 %stack.1, 0, $noreg + renamable $f1s = VL32 %stack.1, 0, $noreg + renamable $f0s = VL32 %stack.0, 0, $noreg + renamable $f0s = VL32 %stack.0, 0, $noreg + renamable $f2s = VL32 %stack.2, 0, $noreg + $cc = IMPLICIT_DEF + $r4l = LY %stack.4, 0, $noreg + $r3l = LY %stack.3, 0, $noreg + $r3l = LY %stack.3, 0, $noreg + Return + +... + +### Test that FIs 0, 1, 2, 3 and 5 are all found to be in U12 range and sorted +### together for alignments (their original order would have caused added alignments +### make one of the objects to go out of U12 range). +# CHECK: Reordered SystemZ frame objects for function f10 +# CHECK-NEXT: Estim offs FI Align Size Density D12Freq DPairFr TotFreq +# CHECK-NEXT: 4092 4 4 1200 0 0 0 0 +# CHECK-NEXT: 4090 1 2 2 0.5 1 0 1 +# CHECK-NEXT: 4088 3 2 2 0.5 1 0 1 +# CHECK-NEXT: 1768 0 8 2320 0.000431 1 0 1 +# CHECK-NEXT: 256 2 8 1512 0.000661 1 0 1 +# CHECK-NEXT: 184 5 8 72 0.0139 1 0 1 + +--- +name: f10 +tracksRegLiveness: true +stack: + - { id: 0, size: 2320, alignment: 8 } + - { id: 1, size: 2, alignment: 2 } + - { id: 2, size: 1512, alignment: 8 } + - { id: 3, size: 2, alignment: 2 } + - { id: 4, size: 1200, alignment: 4 } + - { id: 5, size: 72, alignment: 8 } + +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $r1l + + renamable $f0s = VL32 %stack.0, 0, $noreg + renamable $f1s = VL32 %stack.1, 0, $noreg + renamable $f2s = VL32 %stack.2, 0, $noreg + renamable $f3s = VL32 %stack.3, 0, $noreg + renamable $f5s = VL32 %stack.5, 0, $noreg + Return + +... + +### Test packing some smaller less valued objects below a big object that will still +### be in range as it is only accessed with a small offset. +# CHECK: Sorted by D12 densities = { 1, 0, 2, } +# CHECK-NEXT: After packing below U12 = { 0, 2, 1, } +# CHECK-NEXT: After sorting by alignments = { 2, 0, 1, } +# CHECK-NEXT: Reordered SystemZ frame objects for function f1 +# CHECK-NEXT: Estim offs FI Align Size Density D12Freq DPairFr TotFreq +# CHECK-NEXT: 328 1 8 4096 0.000244 1 0 1 +# CHECK-NEXT: 312 0 2 16 0.0625 0 1 1 +# CHECK-NEXT: 184 2 4 128 0.00781 0 1 1 +# CHECK-NOT: LAY +# CHECK-NOT: LEY +--- +name: f11 +tracksRegLiveness: true +stack: + - { id: 0, size: 16, alignment: 2 } + - { id: 1, size: 4095, alignment: 8 } + - { id: 2, size: 128, alignment: 4 } +machineFunctionInfo: {} +body: | + bb.0: + renamable $f1s = VL32 %stack.1, 16, $noreg + renamable $f0s = LE %stack.0, 0, $noreg + renamable $f2s = LE %stack.2, 0, $noreg + Return + +... + +### Similar, but this time FI(1) is used with a bigger offset which makes only a +### smaller object fit. Since FI(1) needs to be aligned to 8, FI(2) does not fit. +### The out-of-range offset for FI(1) should be ignored. +# CHECK: Sorted by D12 densities = { 1, 2, 0, 3, } +# CHECK-NEXT: After packing below U12 = { 3, 1, 0, 2, } +# CHECK-NEXT: After sorting by alignments = { 3, 1, 2, 0, } +# CHECK-NEXT: Reordered SystemZ frame objects for function f12 +# CHECK-NEXT: Estim offs FI Align Size Density D12Freq DPairFr TotFreq +# CHECK-NEXT: 5200 0 2 16 0.312 0 5 5 +# CHECK-NEXT: 5192 2 4 8 0.375 0 3 3 +# CHECK-NEXT: 192 1 8_ 5000 0.0002 1 0 1 +# CHECK-NEXT: 184 3 4 4 0.25 0 1 1 +# CHECK: VL32 $r15d, 4084 +# CHECK: $f3s = LDE32 +--- +name: f12 +tracksRegLiveness: true +stack: + - { id: 0, size: 16, alignment: 2 } + - { id: 1, size: 5000, alignment: 8 } + - { id: 2, size: 8, alignment: 4 } + - { id: 3, size: 4, alignment: 4 } +machineFunctionInfo: {} +body: | + bb.0: + renamable $f1s = VL32 %stack.1, 3900, $noreg + renamable $f1s = VL32 %stack.1, 4900, $noreg + renamable $f0s = LE %stack.0, 0, $noreg + renamable $f0s = LE %stack.0, 0, $noreg + renamable $f0s = LE %stack.0, 0, $noreg + renamable $f0s = LE %stack.0, 0, $noreg + renamable $f0s = LE %stack.0, 0, $noreg + renamable $f2s = LE %stack.2, 0, $noreg + renamable $f2s = LE %stack.2, 0, $noreg + renamable $f2s = LE %stack.2, 0, $noreg + renamable $f3s = LE %stack.3, 0, $noreg + + Return + +... Index: llvm/test/CodeGen/SystemZ/stackmap.ll =================================================================== --- llvm/test/CodeGen/SystemZ/stackmap.ll +++ llvm/test/CodeGen/SystemZ/stackmap.ll @@ -470,8 +470,8 @@ store i64 12, i64* %metadata1 store i64 13, i64* %metadata1 call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 0, i64* %metadata1) - %metadata2 = alloca i8, i32 4, align 8 - %metadata3 = alloca i16, i32 4, align 8 + %metadata2 = alloca i8, i32 4, align 4 + %metadata3 = alloca i16, i32 4, align 4 call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 17, i32 6, i8* null, i32 0, i8* %metadata2, i16* %metadata3) ret void } Index: llvm/test/CodeGen/X86/O0-pipeline.ll =================================================================== --- llvm/test/CodeGen/X86/O0-pipeline.ll +++ llvm/test/CodeGen/X86/O0-pipeline.ll @@ -53,6 +53,9 @@ ; CHECK-NEXT: Fixup Statepoint Caller Saved ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: Machine Natural Loop Construction +; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization ; CHECK-NEXT: Post-RA pseudo instruction expansion pass ; CHECK-NEXT: X86 pseudo instruction expansion pass