diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -89,6 +89,7 @@ X86VZeroUpper.cpp X86WinEHState.cpp X86InsertWait.cpp + X86ArgumentStackSlotRebase.cpp ) add_llvm_target(X86CodeGen ${sources} diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -166,6 +166,7 @@ FunctionPass *createX86LoadValueInjectionRetHardeningPass(); FunctionPass *createX86SpeculativeLoadHardeningPass(); FunctionPass *createX86SpeculativeExecutionSideEffectSuppression(); +FunctionPass *createX86ArgumentStackSlotPass(); void initializeEvexToVexInstPassPass(PassRegistry &); void initializeFPSPass(PassRegistry &); @@ -199,6 +200,7 @@ void initializeX86SpeculativeExecutionSideEffectSuppressionPass(PassRegistry &); void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &); void initializeX86TileConfigPass(PassRegistry &); +void initializeX86ArgumentStackSlotPassPass(PassRegistry &); namespace X86AS { enum : unsigned { diff --git a/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp b/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp @@ -0,0 +1,180 @@ +//===---- X86ArgumentStackSlotRebase.cpp - rebase argument stack slot -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass replace the frame register with a GPR virtual register and set +// the stack offset for each instruction which reference argument from stack. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86MachineFunctionInfo.h" +#include "X86RegisterInfo.h" +#include "X86Subtarget.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" + +using namespace llvm; + +#define DEBUG_TYPE "x86argumentstackrebase" + +namespace { + +class X86ArgumentStackSlotPass : public MachineFunctionPass { + +public: + static char ID; // Pass identification, replacement for typeid + + explicit X86ArgumentStackSlotPass() : MachineFunctionPass(ID) { + initializeX86ArgumentStackSlotPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +} // end anonymous namespace + +char X86ArgumentStackSlotPass::ID = 0; + +INITIALIZE_PASS(X86ArgumentStackSlotPass, DEBUG_TYPE, "Argument Stack Rebase", + false, false) + +FunctionPass *llvm::createX86ArgumentStackSlotPass() { + return new X86ArgumentStackSlotPass(); +} + +static Register getArgBaseReg(MachineFunction &MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + const X86Subtarget &STI = MF.getSubtarget(); + const Function &F = MF.getFunction(); + CallingConv::ID CC = F.getCallingConv(); + switch (CC) { + // Ww need a virtual register in case there is inline assembly + // clobber ebx/rbx. + case CallingConv::X86_RegCall: { + const TargetRegisterClass *RC = STI.is64Bit() ? &X86::GR64_RC_NoArgRegClass + : &X86::GR32_RC_NoArgRegClass; + return MRI.createVirtualRegister(RC); + } + // TODO: Refine register class for each calling convention. + default: { + const TargetRegisterClass *RC = + STI.is64Bit() ? &X86::GR64_NoArgRegClass : &X86::GR32_BSIRegClass; + return MRI.createVirtualRegister(RC); + } + } +} + +bool X86ArgumentStackSlotPass::runOnMachineFunction(MachineFunction &MF) { + const Function &F = MF.getFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + const X86Subtarget &STI = MF.getSubtarget(); + const X86RegisterInfo *TRI = STI.getRegisterInfo(); + const X86InstrInfo *TII = STI.getInstrInfo(); + X86MachineFunctionInfo *X86FI = MF.getInfo(); + bool Changed = false; + + if (F.hasFnAttribute(Attribute::Naked)) + return false; + // Only support Linux + if (!STI.isTargetLinux()) + return false; + if (!TRI->hasBasePointer(MF)) + return false; + + Register BasePtr = TRI->getBaseRegister(); + auto IsBaseRegisterClobbered = [&]() { + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (!MI.isInlineAsm()) + continue; + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg)) + continue; + if (TRI->isSuperOrSubRegisterEq(BasePtr, Reg)) + return true; + } + } + } + return false; + }; + if (!IsBaseRegisterClobbered()) + return false; + + Register ArgBaseReg = getArgBaseReg(MF); + // leal 4(%esp), %reg + // FIXME: will the instruction be duplicated or eliminated? Should + // define a pseudo instruction for it? + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc DL; + // Emit instruction to copy get stack pointer to a virtual register + // and save the instruction to x86 machine functon info. We can get + // physical register of ArgBaseReg after register allocation. + MachineInstr *LEA; + if (STI.is64Bit()) + LEA = BuildMI(MBB, MBBI, DL, TII->get(X86::LEA64r), ArgBaseReg) + .addUse(X86::RSP) + .addImm(1) + .addUse(X86::NoRegister) + .addImm(8) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + else + LEA = BuildMI(MBB, MBBI, DL, TII->get(X86::LEA32r), ArgBaseReg) + .addUse(X86::ESP) + .addImm(1) + .addUse(X86::NoRegister) + .addImm(4) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + X86FI->setStackPtrSaveMI(LEA); + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + int I = 0; + for (MachineOperand &MO : MI.operands()) { + if (MO.isFI()) { + int Idx = MO.getIndex(); + if (!MFI.isFixedObjectIndex(Idx)) + continue; + int64_t Offset = MFI.getObjectOffset(Idx); + if (Offset < 0) + continue; + // Replace frame register with general virtual register. + MO.ChangeToRegister(ArgBaseReg, false); + // Fill the offset of the stack slot. + MI.getOperand(I + 3).setImm(Offset); + Changed = true; + } + ++I; + } + } + } + + return Changed; +} diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -33,6 +33,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/LEB128.h" #include "llvm/Target/TargetOptions.h" #include @@ -476,6 +477,7 @@ MachineFrameInfo &MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + X86MachineFunctionInfo *X86FI = MF.getInfo(); // Add callee saved registers to move list. const std::vector &CSI = MFI.getCalleeSavedInfo(); @@ -487,8 +489,33 @@ unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); if (IsPrologue) { - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + if (X86FI->getStackPtrSaveMI()) { + // +2*SlotSize because there is return address and ebp at the bottom + // of the stack. + // | retaddr | + // | ebp | + // | |<--ebp + Offset += 2 * SlotSize; + SmallString<64> CfaExpr; + CfaExpr.push_back(dwarf::DW_CFA_expression); + uint8_t buffer[16]; + CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); + CfaExpr.push_back(2); + Register FramePtr = TRI->getFrameRegister(MF); + const Register MachineFramePtr = + STI.isTarget64BitILP32() + ? Register(getX86SubSuperRegister(FramePtr, 64)) + : FramePtr; + unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true); + CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr)); + CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer)); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape(nullptr, CfaExpr.str()), + MachineInstr::FrameSetup); + } else { + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + } } else { BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createRestore(nullptr, DwarfReg)); @@ -1509,6 +1536,53 @@ // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; + Register ArgBaseReg; + + // Emit extra prolog for argument stack slot reference. + if (auto *MI = X86FI->getStackPtrSaveMI()) { + // MI is lea instruction that created in X86ArgumentStackSlotPass. + // Crreat extra prolog for stack realignment. + // leal 4(%esp), %basereg + // .cfi_def_cfa %basereg, 0 + // andl $-128, %esp + // pushl -4(%basereg) + ArgBaseReg = MI->getOperand(0).getReg(); + if (Is64Bit) { + // leaq 8(%rsp), %reg + BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), ArgBaseReg) + .addUse(X86::RSP) + .addImm(1) + .addUse(X86::NoRegister) + .addImm(8) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + } else { + // leal 4(%esp), %reg + BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), ArgBaseReg) + .addUse(X86::ESP) + .addImm(1) + .addUse(X86::NoRegister) + .addImm(4) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + } + if (NeedsDwarfCFI) { + // .cfi_def_cfa %basereg, 0 + unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0), + MachineInstr::FrameSetup); + } + BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign); + int64_t Offset = Is64Bit ? -8 : -4; + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm: X86::PUSH32rmm)) + .addReg(ArgBaseReg) + .addImm(1) + .addReg(X86::NoRegister) + .addImm(Offset) + .addReg(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + } // Space reserved for stack-based arguments when making a (ABI-guaranteed) // tail call. @@ -1640,7 +1714,7 @@ .addReg(MachineFramePtr, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); - if (NeedsDwarfCFI) { + if (NeedsDwarfCFI && !ArgBaseReg.isValid()) { // Mark the place where EBP/RBP was saved. // Define the current CFA rule to use the provided offset. assert(StackSize); @@ -1717,13 +1791,27 @@ .setMIFlag(MachineInstr::FrameSetup); if (NeedsDwarfCFI) { - // Mark effective beginning of when frame pointer becomes valid. - // Define the current CFA to use the EBP/RBP register. - unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); - BuildCFI( - MBB, MBBI, DL, - MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr), - MachineInstr::FrameSetup); + if (ArgBaseReg.isValid()) { + SmallString<64> CfaExpr; + CfaExpr.push_back(dwarf::DW_CFA_expression); + uint8_t buffer[16]; + unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true); + CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); + CfaExpr.push_back(2); + CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg)); + CfaExpr.push_back(0); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape(nullptr, CfaExpr.str()), + MachineInstr::FrameSetup); + } else { + // Mark effective beginning of when frame pointer becomes valid. + // Define the current CFA to use the EBP/RBP register. + unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); + BuildCFI( + MBB, MBBI, DL, + MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr), + MachineInstr::FrameSetup); + } } if (NeedsWinFPO) { @@ -1790,7 +1878,8 @@ // Realign stack after we pushed callee-saved registers (so that we'll be // able to calculate their offsets from the frame pointer). // Don't do this for Win64, it needs to realign the stack after the prologue. - if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) { + if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) && + !ArgBaseReg.isValid()) { assert(HasFP && "There should be a frame pointer if stack is realigned."); BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign); @@ -2196,6 +2285,37 @@ !MF.getTarget().getTargetTriple().isOSWindows()) && MF.needsFrameMoves(); + Register ArgBaseReg; + if (auto *MI = X86FI->getStackPtrSaveMI()) { + unsigned Opc = X86::LEA32r; + Register StackReg = X86::ESP; + ArgBaseReg = MI->getOperand(0).getReg(); + if (STI.is64Bit()) { + Opc = X86::LEA64r; + StackReg = X86::RSP; + } + // leal -4(%basereg), %esp + // .cfi_def_cfa %esp, 4 + BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg) + .addUse(ArgBaseReg) + .addImm(1) + .addUse(X86::NoRegister) + .addImm(SlotSize) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameDestroy); + if (NeedsDwarfCFI) { + unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize), + MachineInstr::FrameDestroy); + --MBBI; + } + --MBBI; + // Don't need stack pointer saving instruction any more. + MI->eraseFromParent(); + X86FI->setStackPtrSaveMI(nullptr); + } + if (IsFunclet) { assert(HasFP && "EH funclets without FP not yet implemented"); NumBytes = getWinEHFuncletFrameSize(MF); @@ -2237,11 +2357,13 @@ } if (NeedsDwarfCFI) { - unsigned DwarfStackPtr = - TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize), - MachineInstr::FrameDestroy); + if (!ArgBaseReg.isValid()) { + unsigned DwarfStackPtr = + TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize), + MachineInstr::FrameDestroy); + } if (!MBB.succ_empty() && !MBB.isReturnBlock()) { unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); BuildCFI(MBB, AfterPop, DL, diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -117,6 +117,8 @@ /// determine if we should insert tilerelease in frame lowering. bool HasVirtualTileReg = false; + MachineInstr *StackPtrSaveMI = nullptr; + std::optional SwiftAsyncContextFrameIdx; // Preallocated fields are only used during isel. @@ -225,6 +227,9 @@ bool hasVirtualTileReg() const { return HasVirtualTileReg; } void setHasVirtualTileReg(bool v) { HasVirtualTileReg = v; } + void setStackPtrSaveMI(MachineInstr *MI) { StackPtrSaveMI = MI; } + MachineInstr *getStackPtrSaveMI() const { return StackPtrSaveMI; } + std::optional getSwiftAsyncContextFrameIdx() const { return SwiftAsyncContextFrameIdx; } diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -702,6 +702,11 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { const X86MachineFunctionInfo *X86FI = MF.getInfo(); + // We have a virtual register to reference argument, and don't need base + // pointer. + if (X86FI->getStackPtrSaveMI() != nullptr) + return false; + if (X86FI->hasPreallocatedCall()) return true; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -433,6 +433,23 @@ (add RAX, RCX, RDX, RSI, RDI, R8, R9, RBX, R14, R15, R12, R13, RBP)>; +// It includes the intersection of win64 and linux64 registers that are +// not used to pass/return argument. R10 and R11 are scratch register. +def GR64_NoArg: RegisterClass<"X86", [i64], 64, (add R10, R11, RBX, + R12, R13, R14, R15)> { + let GeneratePressureSet = 0; +} +// It includes the linux64 registers that are not used to pass/return argument +// in regcall. +def GR64_RC_NoArg: RegisterClass<"X86", [i64], 64, (add RBX, R10)> { + let GeneratePressureSet = 0; +} +// It includes the linux32 registers that are not used to pass/return argument +// in regcall. +def GR32_RC_NoArg: RegisterClass<"X86", [i32], 32, (add EBX)> { + let GeneratePressureSet = 0; +} + // Segment registers for use by MOV instructions (and others) that have a // segment register as one operand. Always contain a 16-bit segment // descriptor. diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -104,6 +104,7 @@ initializePseudoProbeInserterPass(PR); initializeX86ReturnThunksPass(PR); initializeX86DAGToDAGISelPass(PR); + initializeX86ArgumentStackSlotPassPass(PR); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -518,6 +519,7 @@ } void X86PassConfig::addPreRegAlloc() { + addPass(createX86ArgumentStackSlotPass()); if (getOptLevel() != CodeGenOpt::None) { addPass(&LiveRangeShrinkID); addPass(createX86FixupSetCC()); diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -41,6 +41,7 @@ ; CHECK-NEXT: X86 PIC Global Base Reg Initialization ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: Local Stack Slot Allocation +; CHECK-NEXT: Argument Stack Rebase ; CHECK-NEXT: X86 speculative load hardening ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: X86 EFLAGS copy lowering diff --git a/llvm/test/CodeGen/X86/i386-baseptr.ll b/llvm/test/CodeGen/X86/i386-baseptr.ll --- a/llvm/test/CodeGen/X86/i386-baseptr.ll +++ b/llvm/test/CodeGen/X86/i386-baseptr.ll @@ -39,16 +39,18 @@ define void @clobber_base() #0 { ; CHECK-LABEL: clobber_base: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: leal {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: .cfi_def_cfa %ebx, 0 +; CHECK-NEXT: andl $-128, %esp +; CHECK-NEXT: pushl -4(%ebx) ; CHECK-NEXT: pushl %ebp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: .cfi_offset %ebp, -8 ; CHECK-NEXT: movl %esp, %ebp -; CHECK-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NEXT: .cfi_escape 0x10, 0x05, 0x02, 0x75, 0x00 # +; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: andl $-128, %esp -; CHECK-NEXT: subl $128, %esp -; CHECK-NEXT: movl %esp, %esi -; CHECK-NEXT: .cfi_offset %esi, -12 +; CHECK-NEXT: subl $112, %esp +; CHECK-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x75, 0x78 # +; CHECK-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x75, 0x7c # ; CHECK-NEXT: calll helper@PLT ; CHECK-NEXT: movl %esp, %ecx ; CHECK-NEXT: leal 31(,%eax,4), %eax @@ -63,12 +65,14 @@ ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edx, (%esi) +; CHECK-NEXT: movl %edx, -120(%ebp) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $0, (%ecx,%eax) -; CHECK-NEXT: leal -4(%ebp), %esp +; CHECK-NEXT: leal -8(%ebp), %esp ; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %ebx ; CHECK-NEXT: popl %ebp +; CHECK-NEXT: leal 4(%ebx), %esp ; CHECK-NEXT: .cfi_def_cfa %esp, 4 ; CHECK-NEXT: retl entry: @@ -85,17 +89,17 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 %param3, i32 %param4, i32 %param5, i32 %param6) #0 { ; CHECK-LABEL: clobber_baseptr_argptr: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: leal {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: .cfi_def_cfa %ebx, 0 +; CHECK-NEXT: andl $-128, %esp +; CHECK-NEXT: pushl -4(%ebx) ; CHECK-NEXT: pushl %ebp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: .cfi_offset %ebp, -8 ; CHECK-NEXT: movl %esp, %ebp -; CHECK-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NEXT: .cfi_escape 0x10, 0x05, 0x02, 0x75, 0x00 # ; CHECK-NEXT: pushl %ebx -; CHECK-NEXT: andl $-128, %esp -; CHECK-NEXT: subl $128, %esp -; CHECK-NEXT: movl %esp, %esi -; CHECK-NEXT: .cfi_offset %ebx, -12 -; CHECK-NEXT: movl 8(%ebp), %edi +; CHECK-NEXT: subl $116, %esp +; CHECK-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x75, 0x7c # +; CHECK-NEXT: movl (%ebx), %edi ; CHECK-NEXT: calll helper@PLT ; CHECK-NEXT: movl %esp, %ecx ; CHECK-NEXT: leal 31(,%eax,4), %eax @@ -114,12 +118,13 @@ ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edx, (%esi) +; CHECK-NEXT: movl %edx, -120(%ebp) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl %edi, (%ecx,%eax) ; CHECK-NEXT: leal -4(%ebp), %esp ; CHECK-NEXT: popl %ebx ; CHECK-NEXT: popl %ebp +; CHECK-NEXT: leal 4(%ebx), %esp ; CHECK-NEXT: .cfi_def_cfa %esp, 4 ; CHECK-NEXT: retl entry: @@ -135,6 +140,6 @@ ret void } -attributes #0 = { "frame-pointer"="all"} +attributes #0 = {"frame-pointer"="all"} !llvm.module.flags = !{!0} !0 = !{i32 2, !"override-stack-alignment", i32 32} diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -116,6 +116,7 @@ ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions +; CHECK-NEXT: Argument Stack Rebase ; CHECK-NEXT: Live Range Shrink ; CHECK-NEXT: X86 Fixup SetCC ; CHECK-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/X86/swifttail-realign.ll b/llvm/test/CodeGen/X86/swifttail-realign.ll --- a/llvm/test/CodeGen/X86/swifttail-realign.ll +++ b/llvm/test/CodeGen/X86/swifttail-realign.ll @@ -6,11 +6,11 @@ define swifttailcc void @caller(i64 %n) { ; CHECK-LABEL: caller: +; CHECK: andq $-32, %rsp ; CHECK: subq $16, %rsp ; CHECK: pushq %rbp ; CHECK: movq %rsp, %rbp ; CHECK: pushq %rbx -; CHECK: andq $-32, %rsp ; [... don't really care what happens to rsp to allocate %ptr ...] ; CHECK: movq 24(%rbp), [[RETADDR:%.*]] ; CHECK: movq [[RETADDR]], 8(%rbp) diff --git a/llvm/test/CodeGen/X86/x86-64-baseptr.ll b/llvm/test/CodeGen/X86/x86-64-baseptr.ll --- a/llvm/test/CodeGen/X86/x86-64-baseptr.ll +++ b/llvm/test/CodeGen/X86/x86-64-baseptr.ll @@ -77,16 +77,16 @@ define void @clobber_base() #0 { ; CHECK-LABEL: clobber_base: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r10 +; CHECK-NEXT: .cfi_def_cfa %r10, 0 +; CHECK-NEXT: andq $-128, %rsp +; CHECK-NEXT: pushq -8(%r10) ; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: andq $-128, %rsp -; CHECK-NEXT: subq $128, %rsp -; CHECK-NEXT: movq %rsp, %rbx -; CHECK-NEXT: .cfi_offset %rbx, -24 +; CHECK-NEXT: subq $104, %rsp +; CHECK-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # ; CHECK-NEXT: callq helper@PLT ; CHECK-NEXT: movq %rsp, %rcx ; CHECK-NEXT: movl %eax, %eax @@ -102,27 +102,28 @@ ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edx, (%rbx) +; CHECK-NEXT: movl %edx, -112(%rbp) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $0, (%rcx,%rax) ; CHECK-NEXT: leaq -8(%rbp), %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp +; CHECK-NEXT: leaq 8(%r10), %rsp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ; CHECK-NEXT: retq ; ; X32ABI-LABEL: clobber_base: ; X32ABI: # %bb.0: # %entry +; X32ABI-NEXT: leaq {{[0-9]+}}(%rsp), %r10 +; X32ABI-NEXT: .cfi_def_cfa %r10, 0 +; X32ABI-NEXT: andl $-128, %esp +; X32ABI-NEXT: pushq -8(%r10) ; X32ABI-NEXT: pushq %rbp -; X32ABI-NEXT: .cfi_def_cfa_offset 16 -; X32ABI-NEXT: .cfi_offset %rbp, -16 ; X32ABI-NEXT: movl %esp, %ebp -; X32ABI-NEXT: .cfi_def_cfa_register %rbp +; X32ABI-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # ; X32ABI-NEXT: pushq %rbx -; X32ABI-NEXT: andl $-128, %esp -; X32ABI-NEXT: subl $128, %esp -; X32ABI-NEXT: movl %esp, %ebx -; X32ABI-NEXT: .cfi_offset %rbx, -24 +; X32ABI-NEXT: subl $104, %esp +; X32ABI-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # ; X32ABI-NEXT: callq helper@PLT ; X32ABI-NEXT: # kill: def $eax killed $eax def $rax ; X32ABI-NEXT: leal 31(,%rax,4), %eax @@ -138,12 +139,13 @@ ; X32ABI-NEXT: #NO_APP ; X32ABI-NEXT: movl $8, %edx ; X32ABI-NEXT: #APP -; X32ABI-NEXT: movl %edx, (%ebx) +; X32ABI-NEXT: movl %edx, -112(%ebp) ; X32ABI-NEXT: #NO_APP ; X32ABI-NEXT: movl $0, (%ecx,%eax) ; X32ABI-NEXT: leal -8(%ebp), %esp ; X32ABI-NEXT: popq %rbx ; X32ABI-NEXT: popq %rbp +; X32ABI-NEXT: leaq 8(%r10), %rsp ; X32ABI-NEXT: .cfi_def_cfa %rsp, 8 ; X32ABI-NEXT: retq entry: @@ -160,14 +162,15 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 %param3, i32 %param4, i32 %param5, i32 %param6, i32 %param7, i32 %param8, i32 %param9, i32 %param10, i32 %param11, i32 %param12) #0 { ; CHECK-LABEL: clobber_baseptr_argptr: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r10 +; CHECK-NEXT: .cfi_def_cfa %r10, 0 +; CHECK-NEXT: andq $-128, %rsp +; CHECK-NEXT: pushq -8(%r10) ; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: andq $-128, %rsp -; CHECK-NEXT: subq $256, %rsp # imm = 0x100 +; CHECK-NEXT: subq $232, %rsp ; CHECK-NEXT: movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -176,17 +179,16 @@ ; CHECK-NEXT: movaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movq %rsp, %rbx -; CHECK-NEXT: .cfi_offset %rbx, -24 -; CHECK-NEXT: .cfi_offset %xmm8, -160 -; CHECK-NEXT: .cfi_offset %xmm9, -144 -; CHECK-NEXT: .cfi_offset %xmm10, -128 -; CHECK-NEXT: .cfi_offset %xmm11, -112 -; CHECK-NEXT: .cfi_offset %xmm12, -96 -; CHECK-NEXT: .cfi_offset %xmm13, -80 -; CHECK-NEXT: .cfi_offset %xmm14, -64 -; CHECK-NEXT: .cfi_offset %xmm15, -48 -; CHECK-NEXT: movl 16(%rbp), %r14d +; CHECK-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # +; CHECK-NEXT: .cfi_escape 0x10, 0x19, 0x02, 0x76, 0xf0, 0x7e # +; CHECK-NEXT: .cfi_escape 0x10, 0x1a, 0x02, 0x76, 0x80, 0x7f # +; CHECK-NEXT: .cfi_escape 0x10, 0x1b, 0x02, 0x76, 0x90, 0x7f # +; CHECK-NEXT: .cfi_escape 0x10, 0x1c, 0x02, 0x76, 0xa0, 0x7f # +; CHECK-NEXT: .cfi_escape 0x10, 0x1d, 0x02, 0x76, 0xb0, 0x7f # +; CHECK-NEXT: .cfi_escape 0x10, 0x1e, 0x02, 0x76, 0x40 # +; CHECK-NEXT: .cfi_escape 0x10, 0x1f, 0x02, 0x76, 0x50 # +; CHECK-NEXT: .cfi_escape 0x10, 0x20, 0x02, 0x76, 0x60 # +; CHECK-NEXT: movl (%r10), %r14d ; CHECK-NEXT: callq helper@PLT ; CHECK-NEXT: movq %rsp, %rcx ; CHECK-NEXT: movl %eax, %eax @@ -205,7 +207,7 @@ ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edx, (%rbx) +; CHECK-NEXT: movl %edx, -240(%rbp) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl %r14d, (%rcx,%rax) ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload @@ -219,19 +221,21 @@ ; CHECK-NEXT: leaq -8(%rbp), %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp +; CHECK-NEXT: leaq 8(%r10), %rsp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ; CHECK-NEXT: retq ; ; X32ABI-LABEL: clobber_baseptr_argptr: ; X32ABI: # %bb.0: # %entry +; X32ABI-NEXT: leaq {{[0-9]+}}(%rsp), %r10 +; X32ABI-NEXT: .cfi_def_cfa %r10, 0 +; X32ABI-NEXT: andl $-128, %esp +; X32ABI-NEXT: pushq -8(%r10) ; X32ABI-NEXT: pushq %rbp -; X32ABI-NEXT: .cfi_def_cfa_offset 16 -; X32ABI-NEXT: .cfi_offset %rbp, -16 ; X32ABI-NEXT: movl %esp, %ebp -; X32ABI-NEXT: .cfi_def_cfa_register %rbp +; X32ABI-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # ; X32ABI-NEXT: pushq %rbx -; X32ABI-NEXT: andl $-128, %esp -; X32ABI-NEXT: subl $256, %esp # imm = 0x100 +; X32ABI-NEXT: subl $232, %esp ; X32ABI-NEXT: movaps %xmm15, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32ABI-NEXT: movaps %xmm14, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32ABI-NEXT: movaps %xmm13, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill @@ -240,17 +244,16 @@ ; X32ABI-NEXT: movaps %xmm10, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32ABI-NEXT: movaps %xmm9, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32ABI-NEXT: movaps %xmm8, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; X32ABI-NEXT: movl %esp, %ebx -; X32ABI-NEXT: .cfi_offset %rbx, -24 -; X32ABI-NEXT: .cfi_offset %xmm8, -160 -; X32ABI-NEXT: .cfi_offset %xmm9, -144 -; X32ABI-NEXT: .cfi_offset %xmm10, -128 -; X32ABI-NEXT: .cfi_offset %xmm11, -112 -; X32ABI-NEXT: .cfi_offset %xmm12, -96 -; X32ABI-NEXT: .cfi_offset %xmm13, -80 -; X32ABI-NEXT: .cfi_offset %xmm14, -64 -; X32ABI-NEXT: .cfi_offset %xmm15, -48 -; X32ABI-NEXT: movl 16(%ebp), %r14d +; X32ABI-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # +; X32ABI-NEXT: .cfi_escape 0x10, 0x19, 0x02, 0x76, 0xf0, 0x7e # +; X32ABI-NEXT: .cfi_escape 0x10, 0x1a, 0x02, 0x76, 0x80, 0x7f # +; X32ABI-NEXT: .cfi_escape 0x10, 0x1b, 0x02, 0x76, 0x90, 0x7f # +; X32ABI-NEXT: .cfi_escape 0x10, 0x1c, 0x02, 0x76, 0xa0, 0x7f # +; X32ABI-NEXT: .cfi_escape 0x10, 0x1d, 0x02, 0x76, 0xb0, 0x7f # +; X32ABI-NEXT: .cfi_escape 0x10, 0x1e, 0x02, 0x76, 0x40 # +; X32ABI-NEXT: .cfi_escape 0x10, 0x1f, 0x02, 0x76, 0x50 # +; X32ABI-NEXT: .cfi_escape 0x10, 0x20, 0x02, 0x76, 0x60 # +; X32ABI-NEXT: movl (%r10), %r14d ; X32ABI-NEXT: callq helper@PLT ; X32ABI-NEXT: # kill: def $eax killed $eax def $rax ; X32ABI-NEXT: leal 31(,%rax,4), %eax @@ -269,7 +272,7 @@ ; X32ABI-NEXT: #NO_APP ; X32ABI-NEXT: movl $8, %edx ; X32ABI-NEXT: #APP -; X32ABI-NEXT: movl %edx, (%ebx) +; X32ABI-NEXT: movl %edx, -240(%ebp) ; X32ABI-NEXT: #NO_APP ; X32ABI-NEXT: movl %r14d, (%ecx,%eax) ; X32ABI-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm8 # 16-byte Reload @@ -283,6 +286,7 @@ ; X32ABI-NEXT: leal -8(%ebp), %esp ; X32ABI-NEXT: popq %rbx ; X32ABI-NEXT: popq %rbp +; X32ABI-NEXT: leaq 8(%r10), %rsp ; X32ABI-NEXT: .cfi_def_cfa %rsp, 8 ; X32ABI-NEXT: retq entry: @@ -298,6 +302,6 @@ ret void } -attributes #0 = { "frame-pointer"="all"} +attributes #0 = {"frame-pointer"="all"} !llvm.module.flags = !{!0} !0 = !{i32 2, !"override-stack-alignment", i32 32}