Index: llvm/lib/Target/SystemZ/SystemZFrameLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -45,6 +45,8 @@ MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + void orderFrameObjects(const MachineFunction &MF, + SmallVectorImpl &ObjectsToAllocate) const override; }; class SystemZELFFrameLowering : public SystemZFrameLowering { Index: llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -80,6 +80,149 @@ } } +// EXPERIMENTAL +#include "llvm/Support/CommandLine.h" +static cl::opt CHECK_FREE_IDX("check-free-idx", cl::init(false)); + +namespace { +struct SZFrameSortingObject { + bool IsValid = false; // true if we care about this Object. + uint32_t ObjectIndex = 0; // Index of Object into MFI list. + uint32_t ObjectSize = 0; // Size of Object in bytes. + uint32_t NumD12 = 0; // Num 12-bit displacement operands. + uint32_t NumDPair = 0; // Num displacements having both variants. + uint32_t NumD20 = 0; // Num 20-bit displacement operands. + uint32_t NumNoFreeIdxReg = 0; // Num users without a free index register op. +}; + +struct SZFrameSortingComparator { + inline bool operator()(const SZFrameSortingObject &A, + const SZFrameSortingObject &B) const { + // Favor the highest "density", computed as (Num / ObjectSize). The + // comparisons of two such fractions are refactored by multiplying both + // sides with A.ObjectSize * B.ObjectSize, in order to eliminate the + // (fp) divisions. + + // Place all invalid objects at the end. + if (!A.IsValid) + return false; + if (!B.IsValid) + return true; + + // Variable sized objects are not considered. + if (!A.ObjectSize || !B.ObjectSize) + return A.ObjectSize > B.ObjectSize; + + // Make the best use of 12-bit displacements by putting them first. + if (A.NumD12 || B.NumD12) { + uint64_t A_D12 = uint64_t(A.NumD12) * uint64_t(B.ObjectSize); + uint64_t B_D12 = uint64_t(B.NumD12) * uint64_t(A.ObjectSize); + return A_D12 < B_D12; + } + + // If all displacements will be in 20-bit range, favor the use of 12-bit + // displacements through shortening. + if (A.NumDPair || B.NumDPair) { + uint64_t A_DPair = uint64_t(A.NumDPair) * uint64_t(B.ObjectSize); + uint64_t B_DPair = uint64_t(B.NumDPair) * uint64_t(A.ObjectSize); + return A_DPair < B_DPair; + } + + // Make the most use of the 20-bit displacement range. + if (A.NumD20 || B.NumD20) { + // EXPERIMENTAL: If the offset goes out of a 20-bit range, it will cost + // 2 instructions if there is not a free index reg. Check if this + // matters with CHECK_FREE_IDX (unlikely as it is an immediate load)... + unsigned AScore = A.NumD20 + (CHECK_FREE_IDX ? A.NumNoFreeIdxReg : 0); + unsigned BScore = B.NumD20 + (CHECK_FREE_IDX ? B.NumNoFreeIdxReg : 0); + uint64_t A_D20 = uint64_t(AScore) * uint64_t(B.ObjectSize); + uint64_t B_D20 = uint64_t(BScore) * uint64_t(A.ObjectSize); + return A_D20 < B_D20; + } + + return A.ObjectIndex < B.ObjectIndex; + } +}; +} // namespace + +void SystemZFrameLowering::orderFrameObjects(const MachineFunction &MF, + SmallVectorImpl &ObjectsToAllocate) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const SystemZInstrInfo *TII = + static_cast(MF.getSubtarget().getInstrInfo()); + + // Make a vector of sorting objects with all MFI objects and mark those to + // be sorted as valid. + int64_t TotalSizeToAllocate = 0; + if (ObjectsToAllocate.size() <= 1) + return; + std::vector SortingObjects(MFI.getObjectIndexEnd()); + for (auto &Obj : ObjectsToAllocate) { + SortingObjects[Obj].IsValid = true; + SortingObjects[Obj].ObjectIndex = Obj; + unsigned Size = MFI.getObjectSize(Obj); + SortingObjects[Obj].ObjectSize = Size; + TotalSizeToAllocate += Size; + } + + bool AllInD20Range = TotalSizeToAllocate <= maxIntN(20); + + // Examine uses for each interesting object and record legal displacements. + for (auto &MBB : MF) { + for (auto &MI : MBB) { + if (MI.isDebugInstr()) + continue; + for (unsigned MOIdx = 0; MOIdx < MI.getNumOperands(); MOIdx++) { + const MachineOperand &MO = MI.getOperand(MOIdx); + if (!MO.isFI()) + continue; + assert((MI.mayLoad() || MI.mayStore()) && "Expected memory use of FI."); + unsigned Opcode = MI.getOpcode(); + const MCInstrDesc &MCID = TII->get(Opcode); + int Index = MO.getIndex(); + if (Index >= 0 && Index < MFI.getObjectIndexEnd() && + SortingObjects[Index].IsValid) { + // Check type of displacement operand. + bool HasDisplPair = TII->hasDisplacementPairInsn(Opcode); + if (AllInD20Range) { + if (HasDisplPair) + SortingObjects[Index].NumDPair++; + else if (!(MCID.TSFlags & SystemZII::Has20BitOffset)) + SortingObjects[Index].NumD12++; + } else { + if ((MCID.TSFlags & SystemZII::Has20BitOffset) || HasDisplPair) { + SortingObjects[Index].NumD20++; + // Check for a free index register. + bool HasIndexReg = MCID.TSFlags & SystemZII::HasIndex; + if (!HasIndexReg || MI.getOperand(MOIdx + 2).getReg() != 0) + SortingObjects[Index].NumNoFreeIdxReg++; + } + else + SortingObjects[Index].NumD12++; + } + } + } + } + } + + // Do an initial sorting. + llvm::stable_sort(SortingObjects, SZFrameSortingComparator()); + + // TODO: Walk through the list and change priority at 4k. + // TODO: Try second sort by original offsets into objects. + + // Now modify the original list to represent the final order that + // we want. + unsigned Idx = 0; + for (auto &Obj : SortingObjects) { + // All invalid items are sorted at the end, so it's safe to stop. + if (!Obj.IsValid) + break; + ObjectsToAllocate[Idx++] = Obj.ObjectIndex; + } + assert(Idx == ObjectsToAllocate.size() && "Broken sorting of elements."); +} + bool SystemZFrameLowering::hasReservedCallFrame( const MachineFunction &MF) const { // The ELF ABI requires us to allocate 160 bytes of stack space for the Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -312,6 +312,9 @@ // exists. unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const; + // Return true if Opcode has a mapping in 12 <-> 20 displacements. + bool hasDisplacementPairInsn(unsigned Opcode) const; + // If Opcode is a load instruction that has a LOAD AND TEST form, // return the opcode for the testing form, otherwise return 0. unsigned getLoadAndTest(unsigned Opcode) const; Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -1652,6 +1652,13 @@ return 0; } +bool SystemZInstrInfo::hasDisplacementPairInsn(unsigned Opcode) const { + const MCInstrDesc &MCID = get(Opcode); + if (MCID.TSFlags & SystemZII::Has20BitOffset) + return SystemZ::getDisp12Opcode(Opcode) >= 0; + return SystemZ::getDisp20Opcode(Opcode) >= 0; +} + unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const { switch (Opcode) { case SystemZ::L: return SystemZ::LT; Index: llvm/test/CodeGen/SystemZ/frame-27.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/frame-27.mir @@ -0,0 +1,54 @@ +# RUN: llc -mtriple=s390x-linux-gnu -start-before=prologepilog %s -o - \ +# RUN: -print-after=prologepilog -verify-machineinstrs 2>&1 | FileCheck %s +# REQUIRES: asserts +# +# Test that stack objects are placed in a good way with respect to the +# displacement operands of users. + +--- | + define void @f1() { ret void } + define void @f2() { ret void } + +... +# Test that %stack.0 is placed close to its D12 user. +# CHECK: # *** IR Dump After Prologue/Epilogue Insertion +# CHECK: # Machine code for function f1 +# CHECK: fi#0: size=16, align=4, at location [SP-4272] +# CHECK: fi#1: size=4096, align=2, at location [SP-4256] +# CHECK: $r15d = AGHI $r15d(tied-def 0), -4288 +# CHECK-NOT: LAY +# CHECK: renamable $f0s = VL32 $r15d, 176, $noreg +--- +name: f1 +tracksRegLiveness: true +stack: + - { id: 0, size: 16, alignment: 4 } + - { id: 1, size: 4096, alignment: 2 } +machineFunctionInfo: {} +body: | + bb.0: + renamable $f0s = VL32 %stack.0, 0, $noreg + Return implicit $f0s + +... +# Test that %stack.1 is placed close to its D12 user. +# CHECK: # *** IR Dump After Prologue/Epilogue Insertion +# CHECK: # Machine code for function f2 +# CHECK: fi#0: size=16, align=4, at location [SP-176] +# CHECK: fi#1: size=4096, align=2, at location [SP-4272] +# CHECK: $r15d = AGHI $r15d(tied-def 0), -4288 +# CHECK-NOT: LAY +# CHECK: renamable $f0s = VL32 $r15d, 176, $noreg +--- +name: f2 +tracksRegLiveness: true +stack: + - { id: 0, size: 16, alignment: 4 } + - { id: 1, size: 4096, alignment: 2 } +machineFunctionInfo: {} +body: | + bb.0: + renamable $f0s = VL32 %stack.1, 0, $noreg + Return implicit $f0s + +...