diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -79,6 +79,14 @@ return StackId != TargetStackID::ScalableVector; } + /// Order the symbols in the local stack. + /// We want to place the local stack objects in some sort of sensible order. + /// The heuristic we use is to try and pack them according to static number + /// of uses(hot). + void + orderFrameObjects(const MachineFunction &MF, + SmallVectorImpl &ObjectsToAllocate) const override; + protected: const RISCVSubtarget &STI; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -453,6 +453,156 @@ Comment.str()); } +namespace { +// Struct used by orderFrameObjects to help sort the stack objects. +struct RISCVFrameSortingObject { + bool IsValid = false; // true if we care about this object. + unsigned ObjectIndex = 0; // Index of Object into MFI list. + unsigned ObjectSize = 0; // Size of Object in bytes + Align ObjectAlignment = Align(1); // Alignment of Object in bytes. + unsigned ObjectNumUses = 0; // Object static number of uses. +}; + +// The comparison function we use for stable_sort to order our local +// stack symbols. The current algorithm is to use an estimated +// "density". This takes into consideration the size and number of +// uses each object has in order to roughly minimize code size. +// So, for example, an object of size 16B that is referenced 5 times +// will get higher priority than 4B objects referenced 1 time. +// The stack symbols with higher piority have shorter offset relative +// to sp/fp so that stack related instructions about them are more +// possible to be improved. + +struct RISCVFrameSortingComparator { + inline bool operator()(const RISCVFrameSortingObject &A, + const RISCVFrameSortingObject &B) const { + + uint64_t DensityAScaled, DensityBScaled; + // For consistency in our comparison, all invalid objects are placed + // at the end. This also allows us to stop walking when we hit the + // first invalid item after it's all sorted. + if (!A.IsValid) + return false; + if (!B.IsValid) + return true; + + // The density is calculated by doing : + // (double)DensityA = A.ObjectNumUses / A.ObjectSize + // (double)DensityB = B.ObjectNumUses / B.ObjectSize + // Since this approach may cause inconsistencies in + // the floating point <, >, == comparisons, depending on the floating + // point model with which the compiler was built, we're going + // to scale both sides by multiplying with + // A.ObjectSize * B.ObjectSize. This ends up factoring away + // the division and, with it, the need for any floating point + // arithmetic. + DensityAScaled = static_cast(A.ObjectNumUses) * + static_cast(B.ObjectSize); + DensityBScaled = static_cast(B.ObjectNumUses) * + static_cast(A.ObjectSize); + + // If the two densities are equal, prioritize highest alignment + // objects. This allows for similar alignment objects + // to be packed together (given the same density). + // There's room for improvement here, also, since we can pack + // similar alignment (different density) objects next to each + // other to save padding. This will also require further + // complexity/iterations, and the overall gain isn't worth it, + // in general. Something to keep in mind, though. + if (DensityAScaled == DensityBScaled) + return A.ObjectAlignment < B.ObjectAlignment; + + return DensityAScaled < DensityBScaled; + } +}; +} // namespace + +// Return true if MI is a load or store for which there exist a compressed +// version. +static bool isCompressibleLdOrSt(const MachineInstr &MI) { + const RISCVSubtarget &STI = MI.getMF()->getSubtarget(); + const unsigned Opcode = MI.getOpcode(); + + return Opcode == RISCV::LW || (!STI.is64Bit() && Opcode == RISCV::FLW) || + Opcode == RISCV::LD || Opcode == RISCV::FLD || Opcode == RISCV::SW || + (!STI.is64Bit() && Opcode == RISCV::FSW) || Opcode == RISCV::SD || + Opcode == RISCV::FSD; +} + +void RISCVFrameLowering::orderFrameObjects( + const MachineFunction &MF, SmallVectorImpl &ObjectsToAllocate) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const RISCVRegisterInfo *RI = STI.getRegisterInfo(); + // It's only used to reduce codesize. + if (!MF.getFunction().hasMinSize() || !STI.hasStdExtCOrZca()) + return; + // Don't waste time if there's nothing to do. + if (ObjectsToAllocate.empty()) + return; + // Create an array of all MFI objects. We won't need all of these + // objects, but we're going to create a full array of them to make + // it easier to index into when we're counting "uses" down below. + // We want to be able to easily/cheaply access an object by simply + // indexing into it, instead of having to search for it every time. + std::vector SortingObjects(MFI.getObjectIndexEnd()); + + // Walk the objects we care about and mark them as such in our working + // struct. + // The stack address of dynamic objects is not affected by object order. + // so it doesn't need to handle it specially. + for (auto &Obj : ObjectsToAllocate) { + SortingObjects[Obj].IsValid = true; + SortingObjects[Obj].ObjectIndex = Obj; + SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj); + SortingObjects[Obj].ObjectSize = MFI.getObjectSize(Obj); + } + + // Count the number of uses for each object. + for (auto &MBB : MF) { + for (auto &MI : MBB) { + if (MI.isDebugInstr()) + continue; + for (const MachineOperand &MO : MI.operands()) { + // Check to see if it's a local stack symbol. + if (!MO.isFI()) + continue; + int Index = MO.getIndex(); + // Check to see if it falls within our range, and is tagged + // to require ordering. + if (Index >= 0 && Index < MFI.getObjectIndexEnd() && + SortingObjects[Index].IsValid) + if (isCompressibleLdOrSt(MI)) + // ld/st is more possible to be compressed so increase its + // weight and 2 is estimate. + SortingObjects[Index].ObjectNumUses += 2; + else + SortingObjects[Index].ObjectNumUses++; + } + } + } + + // Sort the objects using RISCVFrameSortingComparator(see its comment for + // info). + llvm::stable_sort(SortingObjects, RISCVFrameSortingComparator()); + + // Now modify the original list to represent the final order that + // we want. The order will depend on whether we're going to access them + // from the stack pointer or the frame pointer. For SP, the list should + // end up with the END containing objects that we want with smaller offsets. + // For FP, it should be flipped. + int i = 0; + for (auto &Obj : SortingObjects) { + // All invalid items are sorted at the end, so it's safe to stop. + if (!Obj.IsValid) + break; + ObjectsToAllocate[i++] = Obj.ObjectIndex; + } + + // Flip it if we're accessing off of the FP. + if (!RI->hasStackRealignment(MF) && hasFP(MF)) + std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end()); +} + void RISCVFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -1277,7 +1427,8 @@ // We would like to split the SP adjustment to reduce prologue/epilogue // as following instructions. In this way, the offset of the callee saved -// register could fit in a single store. +// register could fit in a single store. Supposed that the first sp adjust +// amount is 2032, // add sp,sp,-2032 // sw ra,2028(sp) // sw s0,2024(sp) @@ -1314,6 +1465,7 @@ // offset that stack compression instructions accept when target supports // compression instructions. if (STI.hasStdExtCOrZca()) { + // The compression extensions may support the following instructions, // riscv32: c.lwsp rd, offset[7:2] => 2^(6 + 2) // c.swsp rs2, offset[7:2] => 2^(6 + 2) // c.flwsp rd, offset[7:2] => 2^(6 + 2) @@ -1329,10 +1481,25 @@ // StackSize meets the condition (StackSize <= 2048 + RVCompressLen), // case1: Amount is 2048 - StackAlign: use addi + addi to adjust sp. // case2: Amount is RVCompressLen: use addi + addi to adjust sp. - if (StackSize <= 2047 + RVCompressLen || - (StackSize > 2048 * 2 - StackAlign && - StackSize <= 2047 * 2 + RVCompressLen) || - StackSize > 2048 * 3 - StackAlign) + auto CanCompress = [&](uint64_t CompressLen) -> bool { + if (StackSize <= 2047 + CompressLen || + (StackSize > 2048 * 2 - StackAlign && + StackSize <= 2047 * 2 + CompressLen) || + StackSize > 2048 * 3 - StackAlign) + return true; + + return false; + }; + // In the epilogue, addi sp, sp, 496 is used to recover the sp and it + // can be compressed(C.ADDI16SP, offset can be [-512, 496]), but + // addi sp, sp, 512 can not be compressed. so try to use 496 first. + // RVCompressLen - StackAlign = 512 - 16 = 496, it satisfies the + // requirement of RV64's stack alignment and is enough for the max + // callee size. + if (STI.getXLen() == 64 && CanCompress(496)) + return 496; + + if (CanCompress(RVCompressLen)) return RVCompressLen; } return 2048 - StackAlign; diff --git a/llvm/test/CodeGen/RISCV/reorder-frame-objects.mir b/llvm/test/CodeGen/RISCV/reorder-frame-objects.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/reorder-frame-objects.mir @@ -0,0 +1,309 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -march=riscv64 -x mir -run-pass=prologepilog -stack-symbol-ordering=0 \ +# RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK-RV64-NO-REORDER %s +# RUN: llc -march=riscv64 -mattr=+c -x mir -run-pass=prologepilog \ +# RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK-RV64-REORDER %s +--- | + + define dso_local void @_Z12stack_use_spv() local_unnamed_addr #0 { + entry: + ret void + } + + declare dso_local void @_Z7callee0Pi(ptr noundef) local_unnamed_addr #0 + + declare dso_local void @_Z7callee1Pc(ptr noundef) local_unnamed_addr #0 + + define dso_local void @_Z12stack_use_fpjj(i32 noundef signext %m, i32 noundef signext %n) local_unnamed_addr #0 { + entry: + ret void + } + + attributes #0 = { minsize optsize } + +... +--- +name: _Z12stack_use_spv +alignment: 2 +tracksRegLiveness: true +tracksDebugUserValues: true +frameInfo: + maxAlignment: 4 + hasCalls: true + localFrameSize: 2072 +stack: + - { id: 0, size: 4, alignment: 4, local-offset: -4 } + - { id: 1, size: 1, alignment: 1, local-offset: -5 } + - { id: 2, size: 16, alignment: 4, local-offset: -24 } + - { id: 3, size: 2048, alignment: 4, local-offset: -2072 } +machineFunctionInfo: + varArgsFrameIndex: 0 + varArgsSaveSize: 0 +body: | + bb.0.entry: + ; CHECK-RV64-NO-REORDER-LABEL: name: _Z12stack_use_spv + ; CHECK-RV64-NO-REORDER: liveins: $x1 + ; CHECK-RV64-NO-REORDER-NEXT: {{ $}} + ; CHECK-RV64-NO-REORDER-NEXT: $x2 = frame-setup ADDI $x2, -2032 + ; CHECK-RV64-NO-REORDER-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2032 + ; CHECK-RV64-NO-REORDER-NEXT: SD killed $x1, $x2, 2024 :: (store (s64) into %stack.4) + ; CHECK-RV64-NO-REORDER-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 + ; CHECK-RV64-NO-REORDER-NEXT: $x2 = frame-setup ADDI $x2, -64 + ; CHECK-RV64-NO-REORDER-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2096 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = ADDI $x2, 2047 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = ADDI killed $x10, 37 + ; CHECK-RV64-NO-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = ADDI $x2, 2047 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = ADDI killed $x10, 36 + ; CHECK-RV64-NO-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee1Pc, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = ADDI $x2, 2047 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = ADDI killed $x10, 17 + ; CHECK-RV64-NO-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = ADDI $x2, 2047 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = ADDI killed $x10, 17 + ; CHECK-RV64-NO-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = ADDI $x2, 2047 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = ADDI killed $x10, 17 + ; CHECK-RV64-NO-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = ADDI $x2, 2047 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = ADDI killed $x10, 17 + ; CHECK-RV64-NO-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = ADDI $x2, 2047 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = ADDI killed $x10, 17 + ; CHECK-RV64-NO-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = ADDI $x2, 16 + ; CHECK-RV64-NO-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ; CHECK-RV64-NO-REORDER-NEXT: $x2 = frame-destroy ADDI $x2, 64 + ; CHECK-RV64-NO-REORDER-NEXT: $x1 = LD $x2, 2024 :: (load (s64) from %stack.4) + ; CHECK-RV64-NO-REORDER-NEXT: $x2 = frame-destroy ADDI $x2, 2032 + ; CHECK-RV64-NO-REORDER-NEXT: PseudoRET + ; + ; CHECK-RV64-REORDER-LABEL: name: _Z12stack_use_spv + ; CHECK-RV64-REORDER: liveins: $x1 + ; CHECK-RV64-REORDER-NEXT: {{ $}} + ; CHECK-RV64-REORDER-NEXT: $x2 = frame-setup ADDI $x2, -496 + ; CHECK-RV64-REORDER-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 496 + ; CHECK-RV64-REORDER-NEXT: SD killed $x1, $x2, 488 :: (store (s64) into %stack.4) + ; CHECK-RV64-REORDER-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 + ; CHECK-RV64-REORDER-NEXT: $x2 = frame-setup ADDI $x2, -1600 + ; CHECK-RV64-REORDER-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2096 + ; CHECK-RV64-REORDER-NEXT: $x10 = ADDI $x2, 36 + ; CHECK-RV64-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ; CHECK-RV64-REORDER-NEXT: $x10 = ADDI $x2, 19 + ; CHECK-RV64-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee1Pc, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ; CHECK-RV64-REORDER-NEXT: $x10 = ADDI $x2, 20 + ; CHECK-RV64-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ; CHECK-RV64-REORDER-NEXT: $x10 = ADDI $x2, 20 + ; CHECK-RV64-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ; CHECK-RV64-REORDER-NEXT: $x10 = ADDI $x2, 20 + ; CHECK-RV64-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ; CHECK-RV64-REORDER-NEXT: $x10 = ADDI $x2, 20 + ; CHECK-RV64-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ; CHECK-RV64-REORDER-NEXT: $x10 = ADDI $x2, 20 + ; CHECK-RV64-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ; CHECK-RV64-REORDER-NEXT: $x10 = ADDI $x2, 40 + ; CHECK-RV64-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ; CHECK-RV64-REORDER-NEXT: $x2 = frame-destroy ADDI $x2, 1600 + ; CHECK-RV64-REORDER-NEXT: $x1 = LD $x2, 488 :: (load (s64) from %stack.4) + ; CHECK-RV64-REORDER-NEXT: $x2 = frame-destroy ADDI $x2, 496 + ; CHECK-RV64-REORDER-NEXT: PseudoRET + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + $x10 = ADDI %stack.0, 0 + PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + $x10 = ADDI %stack.1, 0 + PseudoCALL target-flags(riscv-call) @_Z7callee1Pc, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + $x10 = ADDI %stack.2, 0 + PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + $x10 = ADDI %stack.2, 0 + PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + $x10 = ADDI %stack.2, 0 + PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + $x10 = ADDI %stack.2, 0 + PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + $x10 = ADDI %stack.2, 0 + PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + $x10 = ADDI %stack.3, 0 + PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + PseudoRET + +... +--- +name: _Z12stack_use_fpjj +alignment: 2 +tracksRegLiveness: true +tracksDebugUserValues: true +liveins: + - { reg: '$x10' } + - { reg: '$x11' } +frameInfo: + maxAlignment: 4 + hasCalls: true + localFrameSize: 2068 +stack: + - { id: 0, size: 2064, alignment: 4, local-offset: -2064 } + - { id: 1, size: 4, alignment: 4, local-offset: -2068 } + - { id: 2, type: variable-sized, alignment: 1, local-offset: -2068 } + - { id: 3, type: variable-sized, alignment: 1, local-offset: -2068 } +machineFunctionInfo: + varArgsFrameIndex: 0 + varArgsSaveSize: 0 +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-RV64-NO-REORDER-LABEL: name: _Z12stack_use_fpjj + ; CHECK-RV64-NO-REORDER: liveins: $x10, $x11, $x1, $x9, $x18, $x19 + ; CHECK-RV64-NO-REORDER-NEXT: {{ $}} + ; CHECK-RV64-NO-REORDER-NEXT: $x2 = frame-setup ADDI $x2, -2032 + ; CHECK-RV64-NO-REORDER-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2032 + ; CHECK-RV64-NO-REORDER-NEXT: SD killed $x1, $x2, 2024 :: (store (s64) into %stack.4) + ; CHECK-RV64-NO-REORDER-NEXT: SD killed $x8, $x2, 2016 :: (store (s64) into %stack.5) + ; CHECK-RV64-NO-REORDER-NEXT: SD killed $x9, $x2, 2008 :: (store (s64) into %stack.6) + ; CHECK-RV64-NO-REORDER-NEXT: SD killed $x18, $x2, 2000 :: (store (s64) into %stack.7) + ; CHECK-RV64-NO-REORDER-NEXT: SD killed $x19, $x2, 1992 :: (store (s64) into %stack.8) + ; CHECK-RV64-NO-REORDER-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 + ; CHECK-RV64-NO-REORDER-NEXT: frame-setup CFI_INSTRUCTION offset $x8, -16 + ; CHECK-RV64-NO-REORDER-NEXT: frame-setup CFI_INSTRUCTION offset $x9, -24 + ; CHECK-RV64-NO-REORDER-NEXT: frame-setup CFI_INSTRUCTION offset $x18, -32 + ; CHECK-RV64-NO-REORDER-NEXT: frame-setup CFI_INSTRUCTION offset $x19, -40 + ; CHECK-RV64-NO-REORDER-NEXT: $x8 = frame-setup ADDI $x2, 2032 + ; CHECK-RV64-NO-REORDER-NEXT: frame-setup CFI_INSTRUCTION def_cfa $x8, 0 + ; CHECK-RV64-NO-REORDER-NEXT: $x2 = frame-setup ADDI $x2, -96 + ; CHECK-RV64-NO-REORDER-NEXT: renamable $x19 = COPY $x2 + ; CHECK-RV64-NO-REORDER-NEXT: renamable $x10 = SLLI killed renamable $x10, 32 + ; CHECK-RV64-NO-REORDER-NEXT: renamable $x10 = SRLI killed renamable $x10, 30 + ; CHECK-RV64-NO-REORDER-NEXT: renamable $x10 = nuw ADDI killed renamable $x10, 15 + ; CHECK-RV64-NO-REORDER-NEXT: renamable $x10 = ANDI killed renamable $x10, -16 + ; CHECK-RV64-NO-REORDER-NEXT: renamable $x18 = SUB $x2, killed renamable $x10 + ; CHECK-RV64-NO-REORDER-NEXT: $x2 = COPY renamable $x18 + ; CHECK-RV64-NO-REORDER-NEXT: renamable $x11 = SLLI killed renamable $x11, 32 + ; CHECK-RV64-NO-REORDER-NEXT: renamable $x11 = SRLI killed renamable $x11, 30 + ; CHECK-RV64-NO-REORDER-NEXT: renamable $x11 = nuw ADDI killed renamable $x11, 15 + ; CHECK-RV64-NO-REORDER-NEXT: renamable $x11 = ANDI killed renamable $x11, -16 + ; CHECK-RV64-NO-REORDER-NEXT: renamable $x9 = SUB $x2, killed renamable $x11 + ; CHECK-RV64-NO-REORDER-NEXT: $x2 = COPY renamable $x9 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = ADDI $x8, -2048 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = ADDI killed $x10, -64 + ; CHECK-RV64-NO-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = COPY killed renamable $x18 + ; CHECK-RV64-NO-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = COPY killed renamable $x9 + ; CHECK-RV64-NO-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = ADDI $x8, -2048 + ; CHECK-RV64-NO-REORDER-NEXT: $x10 = ADDI killed $x10, -68 + ; CHECK-RV64-NO-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 + ; CHECK-RV64-NO-REORDER-NEXT: $x2 = COPY killed renamable $x19 + ; CHECK-RV64-NO-REORDER-NEXT: $x2 = frame-destroy ADDI $x8, -2048 + ; CHECK-RV64-NO-REORDER-NEXT: $x2 = frame-destroy ADDI killed $x2, -80 + ; CHECK-RV64-NO-REORDER-NEXT: $x2 = frame-destroy ADDI $x2, 96 + ; CHECK-RV64-NO-REORDER-NEXT: $x1 = LD $x2, 2024 :: (load (s64) from %stack.4) + ; CHECK-RV64-NO-REORDER-NEXT: $x8 = LD $x2, 2016 :: (load (s64) from %stack.5) + ; CHECK-RV64-NO-REORDER-NEXT: $x9 = LD $x2, 2008 :: (load (s64) from %stack.6) + ; CHECK-RV64-NO-REORDER-NEXT: $x18 = LD $x2, 2000 :: (load (s64) from %stack.7) + ; CHECK-RV64-NO-REORDER-NEXT: $x19 = LD $x2, 1992 :: (load (s64) from %stack.8) + ; CHECK-RV64-NO-REORDER-NEXT: $x2 = frame-destroy ADDI $x2, 2032 + ; CHECK-RV64-NO-REORDER-NEXT: PseudoRET + ; + ; CHECK-RV64-REORDER-LABEL: name: _Z12stack_use_fpjj + ; CHECK-RV64-REORDER: liveins: $x10, $x11, $x1, $x9, $x18, $x19 + ; CHECK-RV64-REORDER-NEXT: {{ $}} + ; CHECK-RV64-REORDER-NEXT: $x2 = frame-setup ADDI $x2, -496 + ; CHECK-RV64-REORDER-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 496 + ; CHECK-RV64-REORDER-NEXT: SD killed $x1, $x2, 488 :: (store (s64) into %stack.4) + ; CHECK-RV64-REORDER-NEXT: SD killed $x8, $x2, 480 :: (store (s64) into %stack.5) + ; CHECK-RV64-REORDER-NEXT: SD killed $x9, $x2, 472 :: (store (s64) into %stack.6) + ; CHECK-RV64-REORDER-NEXT: SD killed $x18, $x2, 464 :: (store (s64) into %stack.7) + ; CHECK-RV64-REORDER-NEXT: SD killed $x19, $x2, 456 :: (store (s64) into %stack.8) + ; CHECK-RV64-REORDER-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 + ; CHECK-RV64-REORDER-NEXT: frame-setup CFI_INSTRUCTION offset $x8, -16 + ; CHECK-RV64-REORDER-NEXT: frame-setup CFI_INSTRUCTION offset $x9, -24 + ; CHECK-RV64-REORDER-NEXT: frame-setup CFI_INSTRUCTION offset $x18, -32 + ; CHECK-RV64-REORDER-NEXT: frame-setup CFI_INSTRUCTION offset $x19, -40 + ; CHECK-RV64-REORDER-NEXT: $x8 = frame-setup ADDI $x2, 496 + ; CHECK-RV64-REORDER-NEXT: frame-setup CFI_INSTRUCTION def_cfa $x8, 0 + ; CHECK-RV64-REORDER-NEXT: $x2 = frame-setup ADDI $x2, -1632 + ; CHECK-RV64-REORDER-NEXT: renamable $x19 = COPY $x2 + ; CHECK-RV64-REORDER-NEXT: renamable $x10 = SLLI killed renamable $x10, 32 + ; CHECK-RV64-REORDER-NEXT: renamable $x10 = SRLI killed renamable $x10, 30 + ; CHECK-RV64-REORDER-NEXT: renamable $x10 = nuw ADDI killed renamable $x10, 15 + ; CHECK-RV64-REORDER-NEXT: renamable $x10 = ANDI killed renamable $x10, -16 + ; CHECK-RV64-REORDER-NEXT: renamable $x18 = SUB $x2, killed renamable $x10 + ; CHECK-RV64-REORDER-NEXT: $x2 = COPY renamable $x18 + ; CHECK-RV64-REORDER-NEXT: renamable $x11 = SLLI killed renamable $x11, 32 + ; CHECK-RV64-REORDER-NEXT: renamable $x11 = SRLI killed renamable $x11, 30 + ; CHECK-RV64-REORDER-NEXT: renamable $x11 = nuw ADDI killed renamable $x11, 15 + ; CHECK-RV64-REORDER-NEXT: renamable $x11 = ANDI killed renamable $x11, -16 + ; CHECK-RV64-REORDER-NEXT: renamable $x9 = SUB $x2, killed renamable $x11 + ; CHECK-RV64-REORDER-NEXT: $x2 = COPY renamable $x9 + ; CHECK-RV64-REORDER-NEXT: $x10 = ADDI $x8, -2048 + ; CHECK-RV64-REORDER-NEXT: $x10 = ADDI killed $x10, -68 + ; CHECK-RV64-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 + ; CHECK-RV64-REORDER-NEXT: $x10 = COPY killed renamable $x18 + ; CHECK-RV64-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 + ; CHECK-RV64-REORDER-NEXT: $x10 = COPY killed renamable $x9 + ; CHECK-RV64-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 + ; CHECK-RV64-REORDER-NEXT: $x10 = ADDI $x8, -52 + ; CHECK-RV64-REORDER-NEXT: PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 + ; CHECK-RV64-REORDER-NEXT: $x2 = COPY killed renamable $x19 + ; CHECK-RV64-REORDER-NEXT: $x2 = frame-destroy ADDI $x8, -2048 + ; CHECK-RV64-REORDER-NEXT: $x2 = frame-destroy ADDI killed $x2, -80 + ; CHECK-RV64-REORDER-NEXT: $x2 = frame-destroy ADDI $x2, 1632 + ; CHECK-RV64-REORDER-NEXT: $x1 = LD $x2, 488 :: (load (s64) from %stack.4) + ; CHECK-RV64-REORDER-NEXT: $x8 = LD $x2, 480 :: (load (s64) from %stack.5) + ; CHECK-RV64-REORDER-NEXT: $x9 = LD $x2, 472 :: (load (s64) from %stack.6) + ; CHECK-RV64-REORDER-NEXT: $x18 = LD $x2, 464 :: (load (s64) from %stack.7) + ; CHECK-RV64-REORDER-NEXT: $x19 = LD $x2, 456 :: (load (s64) from %stack.8) + ; CHECK-RV64-REORDER-NEXT: $x2 = frame-destroy ADDI $x2, 496 + ; CHECK-RV64-REORDER-NEXT: PseudoRET + renamable $x19 = COPY $x2 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + renamable $x10 = SLLI killed renamable $x10, 32 + renamable $x10 = SRLI killed renamable $x10, 30 + renamable $x10 = nuw ADDI killed renamable $x10, 15 + renamable $x10 = ANDI killed renamable $x10, -16 + renamable $x18 = SUB $x2, killed renamable $x10 + $x2 = COPY renamable $x18 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + renamable $x11 = SLLI killed renamable $x11, 32 + renamable $x11 = SRLI killed renamable $x11, 30 + renamable $x11 = nuw ADDI killed renamable $x11, 15 + renamable $x11 = ANDI killed renamable $x11, -16 + renamable $x9 = SUB $x2, killed renamable $x11 + $x2 = COPY renamable $x9 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + $x10 = ADDI %stack.0, 0 + PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + $x10 = COPY killed renamable $x18 + PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + $x10 = COPY killed renamable $x9 + PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + $x10 = ADDI %stack.1, 0 + PseudoCALL target-flags(riscv-call) @_Z7callee0Pi, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + $x2 = COPY killed renamable $x19 + PseudoRET + +... diff --git a/llvm/test/CodeGen/RISCV/stack-inst-compress.mir b/llvm/test/CodeGen/RISCV/stack-inst-compress.mir --- a/llvm/test/CodeGen/RISCV/stack-inst-compress.mir +++ b/llvm/test/CodeGen/RISCV/stack-inst-compress.mir @@ -92,17 +92,17 @@ ; CHECK-RV64-COM-LABEL: name: _Z15stack_size_2048v ; CHECK-RV64-COM: liveins: $x1 ; CHECK-RV64-COM-NEXT: {{ $}} - ; CHECK-RV64-COM-NEXT: $x2 = frame-setup ADDI $x2, -512 - ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 512 - ; CHECK-RV64-COM-NEXT: SD killed $x1, $x2, 504 :: (store (s64) into %stack.1) + ; CHECK-RV64-COM-NEXT: $x2 = frame-setup ADDI $x2, -496 + ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 496 + ; CHECK-RV64-COM-NEXT: SD killed $x1, $x2, 488 :: (store (s64) into %stack.1) ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 - ; CHECK-RV64-COM-NEXT: $x2 = frame-setup ADDI $x2, -1552 + ; CHECK-RV64-COM-NEXT: $x2 = frame-setup ADDI $x2, -1568 ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2064 ; CHECK-RV64-COM-NEXT: renamable $x10 = ADDI $x2, 8 ; CHECK-RV64-COM-NEXT: PseudoCALL target-flags(riscv-call) @_Z6calleePi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 - ; CHECK-RV64-COM-NEXT: $x2 = frame-destroy ADDI $x2, 1552 - ; CHECK-RV64-COM-NEXT: $x1 = LD $x2, 504 :: (load (s64) from %stack.1) - ; CHECK-RV64-COM-NEXT: $x2 = frame-destroy ADDI $x2, 512 + ; CHECK-RV64-COM-NEXT: $x2 = frame-destroy ADDI $x2, 1568 + ; CHECK-RV64-COM-NEXT: $x1 = LD $x2, 488 :: (load (s64) from %stack.1) + ; CHECK-RV64-COM-NEXT: $x2 = frame-destroy ADDI $x2, 496 ; CHECK-RV64-COM-NEXT: PseudoRET ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 renamable $x10 = ADDI %stack.0, 0 @@ -183,19 +183,19 @@ ; CHECK-RV64-COM-LABEL: name: _Z15stack_size_4096v ; CHECK-RV64-COM: liveins: $x1 ; CHECK-RV64-COM-NEXT: {{ $}} - ; CHECK-RV64-COM-NEXT: $x2 = frame-setup ADDI $x2, -512 - ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 512 - ; CHECK-RV64-COM-NEXT: SD killed $x1, $x2, 504 :: (store (s64) into %stack.1) + ; CHECK-RV64-COM-NEXT: $x2 = frame-setup ADDI $x2, -496 + ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 496 + ; CHECK-RV64-COM-NEXT: SD killed $x1, $x2, 488 :: (store (s64) into %stack.1) ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 ; CHECK-RV64-COM-NEXT: $x2 = frame-setup ADDI $x2, -2048 - ; CHECK-RV64-COM-NEXT: $x2 = frame-setup ADDI killed $x2, -1552 + ; CHECK-RV64-COM-NEXT: $x2 = frame-setup ADDI killed $x2, -1568 ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 4112 ; CHECK-RV64-COM-NEXT: renamable $x10 = ADDI $x2, 8 ; CHECK-RV64-COM-NEXT: PseudoCALL target-flags(riscv-call) @_Z6calleePi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 ; CHECK-RV64-COM-NEXT: $x2 = frame-destroy ADDI $x2, 2032 - ; CHECK-RV64-COM-NEXT: $x2 = frame-destroy ADDI killed $x2, 1568 - ; CHECK-RV64-COM-NEXT: $x1 = LD $x2, 504 :: (load (s64) from %stack.1) - ; CHECK-RV64-COM-NEXT: $x2 = frame-destroy ADDI $x2, 512 + ; CHECK-RV64-COM-NEXT: $x2 = frame-destroy ADDI killed $x2, 1584 + ; CHECK-RV64-COM-NEXT: $x1 = LD $x2, 488 :: (load (s64) from %stack.1) + ; CHECK-RV64-COM-NEXT: $x2 = frame-destroy ADDI $x2, 496 ; CHECK-RV64-COM-NEXT: PseudoRET ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 renamable $x10 = ADDI %stack.0, 0 @@ -282,21 +282,21 @@ ; CHECK-RV64-COM-LABEL: name: _Z15stack_size_8192v ; CHECK-RV64-COM: liveins: $x1 ; CHECK-RV64-COM-NEXT: {{ $}} - ; CHECK-RV64-COM-NEXT: $x2 = frame-setup ADDI $x2, -512 - ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 512 - ; CHECK-RV64-COM-NEXT: SD killed $x1, $x2, 504 :: (store (s64) into %stack.1) + ; CHECK-RV64-COM-NEXT: $x2 = frame-setup ADDI $x2, -496 + ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 496 + ; CHECK-RV64-COM-NEXT: SD killed $x1, $x2, 488 :: (store (s64) into %stack.1) ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 ; CHECK-RV64-COM-NEXT: $x10 = frame-setup LUI 2 - ; CHECK-RV64-COM-NEXT: $x10 = frame-setup ADDIW killed $x10, -496 + ; CHECK-RV64-COM-NEXT: $x10 = frame-setup ADDIW killed $x10, -480 ; CHECK-RV64-COM-NEXT: $x2 = frame-setup SUB $x2, killed $x10 ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8208 ; CHECK-RV64-COM-NEXT: renamable $x10 = ADDI $x2, 8 ; CHECK-RV64-COM-NEXT: PseudoCALL target-flags(riscv-call) @_Z6calleePi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 ; CHECK-RV64-COM-NEXT: $x10 = frame-destroy LUI 2 - ; CHECK-RV64-COM-NEXT: $x10 = frame-destroy ADDIW killed $x10, -496 + ; CHECK-RV64-COM-NEXT: $x10 = frame-destroy ADDIW killed $x10, -480 ; CHECK-RV64-COM-NEXT: $x2 = frame-destroy ADD $x2, killed $x10 - ; CHECK-RV64-COM-NEXT: $x1 = LD $x2, 504 :: (load (s64) from %stack.1) - ; CHECK-RV64-COM-NEXT: $x2 = frame-destroy ADDI $x2, 512 + ; CHECK-RV64-COM-NEXT: $x1 = LD $x2, 488 :: (load (s64) from %stack.1) + ; CHECK-RV64-COM-NEXT: $x2 = frame-destroy ADDI $x2, 496 ; CHECK-RV64-COM-NEXT: PseudoRET ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 renamable $x10 = ADDI %stack.0, 0