diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -88,6 +88,7 @@ X86VZeroUpper.cpp X86WinEHState.cpp X86InsertWait.cpp + X86ArgumentStackSlotRebase.cpp ) add_llvm_target(X86CodeGen ${sources} diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -162,6 +162,7 @@ FunctionPass *createX86LoadValueInjectionRetHardeningPass(); FunctionPass *createX86SpeculativeLoadHardeningPass(); FunctionPass *createX86SpeculativeExecutionSideEffectSuppression(); +FunctionPass *createX86ArgumentStackSlotPass(); void initializeEvexToVexInstPassPass(PassRegistry &); void initializeFPSPass(PassRegistry &); @@ -194,6 +195,7 @@ void initializeX86SpeculativeExecutionSideEffectSuppressionPass(PassRegistry &); void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &); void initializeX86TileConfigPass(PassRegistry &); +void initializeX86ArgumentStackSlotPassPass(PassRegistry &); namespace X86AS { enum : unsigned { diff --git a/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp b/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp @@ -0,0 +1,177 @@ +//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass replace the frame register with GPR virtual register. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86MachineFunctionInfo.h" +#include "X86RegisterInfo.h" +#include "X86Subtarget.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" + +using namespace llvm; + +#define DEBUG_TYPE "x86argumentstackrebase" + +namespace { + +class X86ArgumentStackSlotPass : public MachineFunctionPass { + +public: + static char ID; // Pass identification, replacement for typeid + + explicit X86ArgumentStackSlotPass() : MachineFunctionPass(ID) { + initializeX86ArgumentStackSlotPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +} // end anonymous namespace + +char X86ArgumentStackSlotPass::ID = 0; + +INITIALIZE_PASS(X86ArgumentStackSlotPass, DEBUG_TYPE, "Argument Stack Rebase", + false, false) + +FunctionPass *llvm::createX86ArgumentStackSlotPass() { + return new X86ArgumentStackSlotPass(); +} + +static Register getScratchReg(MachineFunction &MF) { + // FIXME: should use a scratch register to avoid clobber argument + // register for each calling convention? What if it is regcall? It + // seems only ebx/rbx is the candidate. + MachineRegisterInfo &MRI = MF.getRegInfo(); + const X86Subtarget &STI = MF.getSubtarget(); + const Function &F = MF.getFunction(); + CallingConv::ID CC = F.getCallingConv(); + switch (CC) { + case CallingConv::X86_RegCall: + return STI.is64Bit() ? X86::RBX : X86::EBX; + default: + if (STI.is64Bit()) { + return X86::R10; + } else { + const TargetRegisterClass *RC = &X86::GR32_BSIRegClass; + return MRI.createVirtualRegister(RC); + } + } +} + +bool X86ArgumentStackSlotPass::runOnMachineFunction(MachineFunction &MF) { + const Function &F = MF.getFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + const X86Subtarget &STI = MF.getSubtarget(); + const X86RegisterInfo *TRI = STI.getRegisterInfo(); + const X86InstrInfo *TII = STI.getInstrInfo(); + X86MachineFunctionInfo *X86FI = MF.getInfo(); + bool Changed = false; + + if (F.hasFnAttribute(Attribute::Naked)) + return false; + if (!STI.isTargetLinux()) + return false; + if (!TRI->hasBasePointer(MF)) + return false; + + Register BasePtr = TRI->getBaseRegister(); + auto IsBaseRegisterClobbered = [&]() { + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (!MI.isInlineAsm()) + continue; + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg)) + continue; + if (TRI->isSuperOrSubRegisterEq(BasePtr, Reg)) + return true; + } + } + } + return false; + }; + if (!IsBaseRegisterClobbered()) + return false; + + Register ArgBaseReg = getScratchReg(MF); + // leal 4(%esp), %reg + // FIXME: will the instruction be duplicated or eliminated? Should + // define a pseudo instruction for it? + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc DL; + // Emit instruction to copy get stack pointer to a virtual register + // and save the instruction to x86 machine functon info. We can get + // physical register of ArgBaseReg after reigster allocation. + MachineInstr *LEA; + if (STI.is64Bit()) + LEA = BuildMI(MBB, MBBI, DL, TII->get(X86::LEA64r), ArgBaseReg) + .addUse(X86::RSP) + .addImm(1) + .addUse(X86::NoRegister) + .addImm(8) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + else + LEA = BuildMI(MBB, MBBI, DL, TII->get(X86::LEA32r), ArgBaseReg) + .addUse(X86::ESP) + .addImm(1) + .addUse(X86::NoRegister) + .addImm(4) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + X86FI->setStackPtrSaveMI(LEA); + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + int I = 0; + for (MachineOperand &MO : MI.operands()) { + if (MO.isFI()) { + int Idx = MO.getIndex(); + if (!MFI.isFixedObjectIndex(Idx)) + continue; + int64_t Offset = MFI.getObjectOffset(Idx); + if (Offset < 0) + continue; + // Replace frame register with general virtual register. + MO.ChangeToRegister(ArgBaseReg, false); + // Fill the offset of the stack slot. + MI.getOperand(I + 3).setImm(Offset); + Changed = true; + } + ++I; + } + } + } + + return Changed; +} diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -33,6 +33,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/LEB128.h" #include "llvm/Target/TargetOptions.h" #include @@ -476,6 +477,7 @@ MachineFrameInfo &MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + X86MachineFunctionInfo *X86FI = MF.getInfo(); // Add callee saved registers to move list. const std::vector &CSI = MFI.getCalleeSavedInfo(); @@ -487,8 +489,33 @@ unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); if (IsPrologue) { - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + if (X86FI->getStackPtrSaveMI()) { + // +2*SlotSize because there is return address and ebp at the bottom + // of the stack. + // | retaddr | + // | ebp | + // | |<--ebp + Offset += 2 * SlotSize; + SmallString<64> CfaExpr; + CfaExpr.push_back(dwarf::DW_CFA_expression); + uint8_t buffer[16]; + CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); + CfaExpr.push_back(2); + Register FramePtr = TRI->getFrameRegister(MF); + const Register MachineFramePtr = + STI.isTarget64BitILP32() + ? Register(getX86SubSuperRegister(FramePtr, 64)) + : FramePtr; + unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true); + CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr)); + CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer)); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape(nullptr, CfaExpr.str()), + MachineInstr::FrameSetup); + } else { + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + } } else { BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createRestore(nullptr, DwarfReg)); @@ -1509,6 +1536,53 @@ // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; + Register ArgBaseReg; + + // Emit extra prolog for argument stack slot reference. + if (auto *MI = X86FI->getStackPtrSaveMI()) { + // MI is lea instruction that created in X86ArgumentStackSlotPass. + // Crreat extra prolog for stack realignment. + // leal 4(%esp), %basereg + // .cfi_def_cfa %basereg, 0 + // andl $-128, %esp + // pushl -4(%basereg) + ArgBaseReg = MI->getOperand(0).getReg(); + if (Is64Bit) { + // leaq 8(%rsp), %reg + BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), ArgBaseReg) + .addUse(X86::RSP) + .addImm(1) + .addUse(X86::NoRegister) + .addImm(8) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + } else { + // leal 4(%esp), %reg + BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), ArgBaseReg) + .addUse(X86::ESP) + .addImm(1) + .addUse(X86::NoRegister) + .addImm(4) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + } + if (NeedsDwarfCFI) { + // .cfi_def_cfa %basereg, 0 + unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0), + MachineInstr::FrameSetup); + } + BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign); + int64_t Offset = Is64Bit ? -8 : -4; + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm: X86::PUSH32rmm)) + .addReg(ArgBaseReg) + .addImm(1) + .addReg(X86::NoRegister) + .addImm(Offset) + .addReg(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + } // Space reserved for stack-based arguments when making a (ABI-guaranteed) // tail call. @@ -1640,7 +1714,7 @@ .addReg(MachineFramePtr, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); - if (NeedsDwarfCFI) { + if (NeedsDwarfCFI && !ArgBaseReg.isValid()) { // Mark the place where EBP/RBP was saved. // Define the current CFA rule to use the provided offset. assert(StackSize); @@ -1715,13 +1789,27 @@ .setMIFlag(MachineInstr::FrameSetup); if (NeedsDwarfCFI) { - // Mark effective beginning of when frame pointer becomes valid. - // Define the current CFA to use the EBP/RBP register. - unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); - BuildCFI( - MBB, MBBI, DL, - MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr), - MachineInstr::FrameSetup); + if (ArgBaseReg.isValid()) { + SmallString<64> CfaExpr; + CfaExpr.push_back(dwarf::DW_CFA_expression); + uint8_t buffer[16]; + unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true); + CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); + CfaExpr.push_back(2); + CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg)); + CfaExpr.push_back(0); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape(nullptr, CfaExpr.str()), + MachineInstr::FrameSetup); + } else { + // Mark effective beginning of when frame pointer becomes valid. + // Define the current CFA to use the EBP/RBP register. + unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); + BuildCFI( + MBB, MBBI, DL, + MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr), + MachineInstr::FrameSetup); + } } if (NeedsWinFPO) { @@ -1788,7 +1876,8 @@ // Realign stack after we pushed callee-saved registers (so that we'll be // able to calculate their offsets from the frame pointer). // Don't do this for Win64, it needs to realign the stack after the prologue. - if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) { + if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) && + !ArgBaseReg.isValid()) { assert(HasFP && "There should be a frame pointer if stack is realigned."); BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign); @@ -2194,6 +2283,37 @@ !MF.getTarget().getTargetTriple().isOSWindows()) && MF.needsFrameMoves(); + Register ArgBaseReg; + if (auto *MI = X86FI->getStackPtrSaveMI()) { + unsigned Opc = X86::LEA32r; + Register StackReg = X86::ESP; + ArgBaseReg = MI->getOperand(0).getReg(); + if (STI.is64Bit()) { + Opc = X86::LEA64r; + StackReg = X86::RSP; + } + // leal -4(%basereg), %esp + // .cfi_def_cfa %esp, 4 + BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg) + .addUse(ArgBaseReg) + .addImm(1) + .addUse(X86::NoRegister) + .addImm(SlotSize) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameDestroy); + if (NeedsDwarfCFI) { + unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize), + MachineInstr::FrameDestroy); + --MBBI; + } + --MBBI; + // Don't need stack pointer saving instruction any more. + MI->eraseFromParent(); + X86FI->setStackPtrSaveMI(nullptr); + } + if (IsFunclet) { assert(HasFP && "EH funclets without FP not yet implemented"); NumBytes = getWinEHFuncletFrameSize(MF); @@ -2235,11 +2355,13 @@ } if (NeedsDwarfCFI) { - unsigned DwarfStackPtr = - TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize), - MachineInstr::FrameDestroy); + if (!ArgBaseReg.isValid()) { + unsigned DwarfStackPtr = + TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize), + MachineInstr::FrameDestroy); + } if (!MBB.succ_empty() && !MBB.isReturnBlock()) { unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); BuildCFI(MBB, AfterPop, DL, diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -117,6 +117,8 @@ /// determine if we should insert tilerelease in frame lowering. bool HasVirtualTileReg = false; + MachineInstr *StackPtrSaveMI = nullptr; + std::optional SwiftAsyncContextFrameIdx; // Preallocated fields are only used during isel. @@ -225,6 +227,9 @@ bool hasVirtualTileReg() const { return HasVirtualTileReg; } void setHasVirtualTileReg(bool v) { HasVirtualTileReg = v; } + void setStackPtrSaveMI(MachineInstr *MI) { StackPtrSaveMI = MI; } + MachineInstr *getStackPtrSaveMI() const { return StackPtrSaveMI; } + std::optional getSwiftAsyncContextFrameIdx() const { return SwiftAsyncContextFrameIdx; } diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -702,6 +702,10 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { const X86MachineFunctionInfo *X86FI = MF.getInfo(); + // We a scheme to reference argument, and don't need base pointer. + if (X86FI->getStackPtrSaveMI() != nullptr) + return false; + if (X86FI->hasPreallocatedCall()) return true; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -433,6 +433,11 @@ (add RAX, RCX, RDX, RSI, RDI, R8, R9, RBX, R14, R15, R12, R13, RBP)>; +// FIXME: it includes the intersection of win64 and linux64 registers that are +// not used to pass/return argument. +// def GR64_NotArg: RegisterClass<"X86", [i64], 64, (add R10, R11, RBX, +// R12, R13, R14, R15)>; + // Segment registers for use by MOV instructions (and others) that have a // segment register as one operand. Always contain a 16-bit segment // descriptor. diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -104,6 +104,7 @@ initializePseudoProbeInserterPass(PR); initializeX86ReturnThunksPass(PR); initializeX86DAGToDAGISelPass(PR); + initializeX86ArgumentStackSlotPassPass(PR); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -518,6 +519,7 @@ } void X86PassConfig::addPreRegAlloc() { + addPass(createX86ArgumentStackSlotPass()); if (getOptLevel() != CodeGenOpt::None) { addPass(&LiveRangeShrinkID); addPass(createX86FixupSetCC()); diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -41,6 +41,7 @@ ; CHECK-NEXT: X86 PIC Global Base Reg Initialization ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: Local Stack Slot Allocation +; CHECK-NEXT: Argument Stack Rebase ; CHECK-NEXT: X86 speculative load hardening ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: X86 EFLAGS copy lowering diff --git a/llvm/test/CodeGen/X86/i386-baseptr.ll b/llvm/test/CodeGen/X86/i386-baseptr.ll --- a/llvm/test/CodeGen/X86/i386-baseptr.ll +++ b/llvm/test/CodeGen/X86/i386-baseptr.ll @@ -6,11 +6,15 @@ ; CHECK-LABEL: base: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset %ebp, -8 ; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: .cfi_def_cfa_register %ebp ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: andl $-32, %esp ; CHECK-NEXT: subl $32, %esp ; CHECK-NEXT: movl %esp, %esi +; CHECK-NEXT: .cfi_offset %esi, -12 ; CHECK-NEXT: calll helper@PLT ; CHECK-NEXT: movl %esp, %ecx ; CHECK-NEXT: leal 31(,%eax,4), %eax @@ -23,6 +27,7 @@ ; CHECK-NEXT: leal -4(%ebp), %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %ebp +; CHECK-NEXT: .cfi_def_cfa %esp, 4 ; CHECK-NEXT: retl entry: %k = call i32 @helper() @@ -34,12 +39,18 @@ define void @clobber_base() #0 { ; CHECK-LABEL: clobber_base: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: leal {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: .cfi_def_cfa %ebx, 0 +; CHECK-NEXT: andl $-128, %esp +; CHECK-NEXT: pushl -4(%ebx) ; CHECK-NEXT: pushl %ebp ; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: .cfi_escape 0x10, 0x05, 0x02, 0x75, 0x00 # +; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: andl $-128, %esp -; CHECK-NEXT: subl $128, %esp -; CHECK-NEXT: movl %esp, %esi +; CHECK-NEXT: subl $112, %esp +; CHECK-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x75, 0x78 # +; CHECK-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x75, 0x7c # ; CHECK-NEXT: calll helper@PLT ; CHECK-NEXT: movl %esp, %ecx ; CHECK-NEXT: leal 31(,%eax,4), %eax @@ -54,12 +65,15 @@ ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edx, (%esi) +; CHECK-NEXT: movl %edx, -120(%ebp) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $0, (%ecx,%eax) -; CHECK-NEXT: leal -4(%ebp), %esp +; CHECK-NEXT: leal -8(%ebp), %esp ; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %ebx ; CHECK-NEXT: popl %ebp +; CHECK-NEXT: leal 4(%ebx), %esp +; CHECK-NEXT: .cfi_def_cfa %esp, 4 ; CHECK-NEXT: retl entry: %k = call i32 @helper() @@ -72,6 +86,6 @@ ret void } -attributes #0 = { nounwind "frame-pointer"="all"} +attributes #0 = { "frame-pointer"="all"} !llvm.module.flags = !{!0} !0 = !{i32 2, !"override-stack-alignment", i32 32} diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -116,6 +116,7 @@ ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions +; CHECK-NEXT: Argument Stack Rebase ; CHECK-NEXT: Live Range Shrink ; CHECK-NEXT: X86 Fixup SetCC ; CHECK-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/X86/swifttail-realign.ll b/llvm/test/CodeGen/X86/swifttail-realign.ll --- a/llvm/test/CodeGen/X86/swifttail-realign.ll +++ b/llvm/test/CodeGen/X86/swifttail-realign.ll @@ -6,11 +6,11 @@ define swifttailcc void @caller(i64 %n) { ; CHECK-LABEL: caller: +; CHECK: andq $-32, %rsp ; CHECK: subq $16, %rsp ; CHECK: pushq %rbp ; CHECK: movq %rsp, %rbp ; CHECK: pushq %rbx -; CHECK: andq $-32, %rsp ; [... don't really care what happens to rsp to allocate %ptr ...] ; CHECK: movq 24(%rbp), [[RETADDR:%.*]] ; CHECK: movq [[RETADDR]], 8(%rbp) diff --git a/llvm/test/CodeGen/X86/x86-64-baseptr.ll b/llvm/test/CodeGen/X86/x86-64-baseptr.ll --- a/llvm/test/CodeGen/X86/x86-64-baseptr.ll +++ b/llvm/test/CodeGen/X86/x86-64-baseptr.ll @@ -15,11 +15,15 @@ ; CHECK-LABEL: base: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: andq $-32, %rsp ; CHECK-NEXT: subq $32, %rsp ; CHECK-NEXT: movq %rsp, %rbx +; CHECK-NEXT: .cfi_offset %rbx, -24 ; CHECK-NEXT: callq helper@PLT ; CHECK-NEXT: movq %rsp, %rcx ; CHECK-NEXT: movl %eax, %eax @@ -33,16 +37,21 @@ ; CHECK-NEXT: leaq -8(%rbp), %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ; CHECK-NEXT: retq ; ; X32ABI-LABEL: base: ; X32ABI: # %bb.0: # %entry ; X32ABI-NEXT: pushq %rbp +; X32ABI-NEXT: .cfi_def_cfa_offset 16 +; X32ABI-NEXT: .cfi_offset %rbp, -16 ; X32ABI-NEXT: movl %esp, %ebp +; X32ABI-NEXT: .cfi_def_cfa_register %rbp ; X32ABI-NEXT: pushq %rbx ; X32ABI-NEXT: andl $-32, %esp ; X32ABI-NEXT: subl $32, %esp ; X32ABI-NEXT: movl %esp, %ebx +; X32ABI-NEXT: .cfi_offset %rbx, -24 ; X32ABI-NEXT: callq helper@PLT ; X32ABI-NEXT: # kill: def $eax killed $eax def $rax ; X32ABI-NEXT: leal 31(,%rax,4), %eax @@ -56,6 +65,7 @@ ; X32ABI-NEXT: leal -8(%ebp), %esp ; X32ABI-NEXT: popq %rbx ; X32ABI-NEXT: popq %rbp +; X32ABI-NEXT: .cfi_def_cfa %rsp, 8 ; X32ABI-NEXT: retq entry: %k = call i32 @helper() @@ -67,12 +77,16 @@ define void @clobber_base() #0 { ; CHECK-LABEL: clobber_base: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r10 +; CHECK-NEXT: .cfi_def_cfa %r10, 0 +; CHECK-NEXT: andq $-128, %rsp +; CHECK-NEXT: pushq -8(%r10) ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: andq $-128, %rsp -; CHECK-NEXT: subq $128, %rsp -; CHECK-NEXT: movq %rsp, %rbx +; CHECK-NEXT: subq $104, %rsp +; CHECK-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # ; CHECK-NEXT: callq helper@PLT ; CHECK-NEXT: movq %rsp, %rcx ; CHECK-NEXT: movl %eax, %eax @@ -88,22 +102,28 @@ ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edx, (%rbx) +; CHECK-NEXT: movl %edx, -112(%rbp) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $0, (%rcx,%rax) ; CHECK-NEXT: leaq -8(%rbp), %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp +; CHECK-NEXT: leaq 8(%r10), %rsp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ; CHECK-NEXT: retq ; ; X32ABI-LABEL: clobber_base: ; X32ABI: # %bb.0: # %entry +; X32ABI-NEXT: leaq {{[0-9]+}}(%rsp), %r10 +; X32ABI-NEXT: .cfi_def_cfa %r10, 0 +; X32ABI-NEXT: andl $-128, %esp +; X32ABI-NEXT: pushq -8(%r10) ; X32ABI-NEXT: pushq %rbp ; X32ABI-NEXT: movl %esp, %ebp +; X32ABI-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # ; X32ABI-NEXT: pushq %rbx -; X32ABI-NEXT: andl $-128, %esp -; X32ABI-NEXT: subl $128, %esp -; X32ABI-NEXT: movl %esp, %ebx +; X32ABI-NEXT: subl $104, %esp +; X32ABI-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # ; X32ABI-NEXT: callq helper@PLT ; X32ABI-NEXT: # kill: def $eax killed $eax def $rax ; X32ABI-NEXT: leal 31(,%rax,4), %eax @@ -119,12 +139,14 @@ ; X32ABI-NEXT: #NO_APP ; X32ABI-NEXT: movl $8, %edx ; X32ABI-NEXT: #APP -; X32ABI-NEXT: movl %edx, (%ebx) +; X32ABI-NEXT: movl %edx, -112(%ebp) ; X32ABI-NEXT: #NO_APP ; X32ABI-NEXT: movl $0, (%ecx,%eax) ; X32ABI-NEXT: leal -8(%ebp), %esp ; X32ABI-NEXT: popq %rbx ; X32ABI-NEXT: popq %rbp +; X32ABI-NEXT: leaq 8(%r10), %rsp +; X32ABI-NEXT: .cfi_def_cfa %rsp, 8 ; X32ABI-NEXT: retq entry: %k = call i32 @helper() @@ -137,6 +159,6 @@ ret void } -attributes #0 = { nounwind "frame-pointer"="all"} +attributes #0 = {"frame-pointer"="all"} !llvm.module.flags = !{!0} !0 = !{i32 2, !"override-stack-alignment", i32 32}