diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -26,6 +26,7 @@ add_public_tablegen_target(X86CommonTableGen) set(sources + X86ArgumentStackSlotRebase.cpp X86AsmPrinter.cpp X86AvoidTrailingCall.cpp X86CallFrameOptimization.cpp diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -166,11 +166,13 @@ FunctionPass *createX86LoadValueInjectionRetHardeningPass(); FunctionPass *createX86SpeculativeLoadHardeningPass(); FunctionPass *createX86SpeculativeExecutionSideEffectSuppression(); +FunctionPass *createX86ArgumentStackSlotPass(); void initializeEvexToVexInstPassPass(PassRegistry &); void initializeFPSPass(PassRegistry &); void initializeFixupBWInstPassPass(PassRegistry &); void initializeFixupLEAPassPass(PassRegistry &); +void initializeX86ArgumentStackSlotPassPass(PassRegistry &); void initializeX86FixupInstTuningPassPass(PassRegistry &); void initializeWinEHStatePassPass(PassRegistry &); void initializeX86AvoidSFBPassPass(PassRegistry &); diff --git a/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp b/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp @@ -0,0 +1,195 @@ +//===---- X86ArgumentStackSlotRebase.cpp - rebase argument stack slot -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass replace the frame register with a GPR virtual register and set +// the stack offset for each instruction which reference argument from stack. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86MachineFunctionInfo.h" +#include "X86RegisterInfo.h" +#include "X86Subtarget.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" + +using namespace llvm; + +#define DEBUG_TYPE "x86argumentstackrebase" + +namespace { + +class X86ArgumentStackSlotPass : public MachineFunctionPass { + +public: + static char ID; // Pass identification, replacement for typeid + + explicit X86ArgumentStackSlotPass() : MachineFunctionPass(ID) { + initializeX86ArgumentStackSlotPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +} // end anonymous namespace + +char X86ArgumentStackSlotPass::ID = 0; + +INITIALIZE_PASS(X86ArgumentStackSlotPass, DEBUG_TYPE, "Argument Stack Rebase", + false, false) + +FunctionPass *llvm::createX86ArgumentStackSlotPass() { + return new X86ArgumentStackSlotPass(); +} + +static Register getArgBaseReg(MachineFunction &MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + const X86Subtarget &STI = MF.getSubtarget(); + const Function &F = MF.getFunction(); + CallingConv::ID CC = F.getCallingConv(); + Register NoReg; + const TargetRegisterClass *RC = nullptr; + switch (CC) { + // We need a virtual register in case there is inline assembly + // clobber argument base register. + case CallingConv::C: + RC = STI.is64Bit() ? &X86::GR64_ArgRefRegClass : &X86::GR32_ArgRefRegClass; + break; + case CallingConv::X86_RegCall: + // FIXME: For regcall there is no scratch register on 32-bit target. + // We may use a callee saved register as argument base register and + // save it before being changed as base pointer. We need DW_CFA to + // indicate where the callee saved register is saved, so that it can + // be correctly unwind. + // push ebx + // mov ebx, esp + // and esp, -128 + // ... + // pop ebx + // ret + RC = STI.is64Bit() ? &X86::GR64_ArgRefRegClass : nullptr; + break; + // TODO: Refine register class for each calling convention. + default: + break; + } + if (RC) + return MRI.createVirtualRegister(RC); + else + return NoReg; +} + +bool X86ArgumentStackSlotPass::runOnMachineFunction(MachineFunction &MF) { + const Function &F = MF.getFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + const X86Subtarget &STI = MF.getSubtarget(); + const X86RegisterInfo *TRI = STI.getRegisterInfo(); + const X86InstrInfo *TII = STI.getInstrInfo(); + X86MachineFunctionInfo *X86FI = MF.getInfo(); + bool Changed = false; + + if (F.hasFnAttribute(Attribute::Naked)) + return false; + // Only support Linux + if (!STI.isTargetLinux()) + return false; + if (!TRI->hasBasePointer(MF)) + return false; + + Register BasePtr = TRI->getBaseRegister(); + auto IsBaseRegisterClobbered = [&]() { + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (!MI.isInlineAsm()) + continue; + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg)) + continue; + if (TRI->isSuperOrSubRegisterEq(BasePtr, Reg)) + return true; + } + } + } + return false; + }; + if (!IsBaseRegisterClobbered()) + return false; + + Register ArgBaseReg = getArgBaseReg(MF); + if (!ArgBaseReg.isValid()) + return false; + // leal 4(%esp), %reg + // FIXME: will the instruction be duplicated or eliminated? Should + // define a pseudo instruction for it? + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc DL; + // Emit instruction to copy get stack pointer to a virtual register + // and save the instruction to x86 machine functon info. We can get + // physical register of ArgBaseReg after register allocation. The + // stack slot is used to save/restore argument base pointer. We can + // get the index from the instruction. + unsigned SlotSize = TRI->getSlotSize(); + int FI = MFI.CreateSpillStackObject(SlotSize, Align(SlotSize)); + MachineInstr *LEA = + BuildMI(MBB, MBBI, DL, + TII->get(STI.is64Bit() ? X86::LEA64r : X86::LEA32r), ArgBaseReg) + .addFrameIndex(FI) + .addImm(1) + .addUse(X86::NoRegister) + .addImm(SlotSize) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + X86FI->setStackPtrSaveMI(LEA); + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + int I = 0; + for (MachineOperand &MO : MI.operands()) { + if (MO.isFI()) { + int Idx = MO.getIndex(); + if (!MFI.isFixedObjectIndex(Idx)) + continue; + int64_t Offset = MFI.getObjectOffset(Idx); + if (Offset < 0) + continue; + // TODO replace register for debug instruction + if (MI.isDebugInstr()) + continue; + // Replace frame register with argument base pointer and its offset. + TRI->eliminateFrameIndex(MI.getIterator(), I, ArgBaseReg, Offset); + Changed = true; + } + ++I; + } + } + } + + return Changed; +} diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -33,6 +33,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/LEB128.h" #include "llvm/Target/TargetOptions.h" #include @@ -476,6 +477,7 @@ MachineFrameInfo &MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + X86MachineFunctionInfo *X86FI = MF.getInfo(); // Add callee saved registers to move list. const std::vector &CSI = MFI.getCalleeSavedInfo(); @@ -487,13 +489,62 @@ unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); if (IsPrologue) { - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + if (X86FI->getStackPtrSaveMI()) { + // +2*SlotSize because there is return address and ebp at the bottom + // of the stack. + // | retaddr | + // | ebp | + // | |<--ebp + Offset += 2 * SlotSize; + SmallString<64> CfaExpr; + CfaExpr.push_back(dwarf::DW_CFA_expression); + uint8_t buffer[16]; + CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); + CfaExpr.push_back(2); + Register FramePtr = TRI->getFrameRegister(MF); + const Register MachineFramePtr = + STI.isTarget64BitILP32() + ? Register(getX86SubSuperRegister(FramePtr, 64)) + : FramePtr; + unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true); + CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr)); + CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer)); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape(nullptr, CfaExpr.str()), + MachineInstr::FrameSetup); + } else { + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + } } else { BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createRestore(nullptr, DwarfReg)); } } + if (auto *MI = X86FI->getStackPtrSaveMI()) { + int FI = MI->getOperand(1).getIndex(); + int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize; + SmallString<64> CfaExpr; + Register FramePtr = TRI->getFrameRegister(MF); + const Register MachineFramePtr = + STI.isTarget64BitILP32() + ? Register(getX86SubSuperRegister(FramePtr, 64)) + : FramePtr; + unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true); + CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr)); + uint8_t buffer[16]; + CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer)); + CfaExpr.push_back(dwarf::DW_OP_deref); + + SmallString<64> DefCfaExpr; + DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); + DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer)); + DefCfaExpr.append(CfaExpr.str()); + // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()), + MachineInstr::FrameSetup); + } } void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, @@ -1509,6 +1560,42 @@ // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; + Register ArgBaseReg; + + // Emit extra prolog for argument stack slot reference. + if (auto *MI = X86FI->getStackPtrSaveMI()) { + // MI is lea instruction that created in X86ArgumentStackSlotPass. + // Creat extra prolog for stack realignment. + ArgBaseReg = MI->getOperand(0).getReg(); + // leal 4(%esp), %basereg + // .cfi_def_cfa %basereg, 0 + // andl $-128, %esp + // pushl -4(%basereg) + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r), + ArgBaseReg) + .addUse(StackPtr) + .addImm(1) + .addUse(X86::NoRegister) + .addImm(SlotSize) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + if (NeedsDwarfCFI) { + // .cfi_def_cfa %basereg, 0 + unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0), + MachineInstr::FrameSetup); + } + BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign); + int64_t Offset = Is64Bit ? -2 * (int64_t)SlotSize : -1 * (int64_t)SlotSize; + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm: X86::PUSH32rmm)) + .addReg(ArgBaseReg) + .addImm(1) + .addReg(X86::NoRegister) + .addImm(Offset) + .addReg(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + } // Space reserved for stack-based arguments when making a (ABI-guaranteed) // tail call. @@ -1640,7 +1727,7 @@ .addReg(MachineFramePtr, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); - if (NeedsDwarfCFI) { + if (NeedsDwarfCFI && !ArgBaseReg.isValid()) { // Mark the place where EBP/RBP was saved. // Define the current CFA rule to use the provided offset. assert(StackSize); @@ -1717,13 +1804,28 @@ .setMIFlag(MachineInstr::FrameSetup); if (NeedsDwarfCFI) { - // Mark effective beginning of when frame pointer becomes valid. - // Define the current CFA to use the EBP/RBP register. - unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); - BuildCFI( - MBB, MBBI, DL, - MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr), - MachineInstr::FrameSetup); + if (ArgBaseReg.isValid()) { + SmallString<64> CfaExpr; + CfaExpr.push_back(dwarf::DW_CFA_expression); + uint8_t buffer[16]; + unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true); + CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); + CfaExpr.push_back(2); + CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg)); + CfaExpr.push_back(0); + // DW_CFA_expression: reg5 DW_OP_breg5 +0 + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape(nullptr, CfaExpr.str()), + MachineInstr::FrameSetup); + } else { + // Mark effective beginning of when frame pointer becomes valid. + // Define the current CFA to use the EBP/RBP register. + unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); + BuildCFI( + MBB, MBBI, DL, + MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr), + MachineInstr::FrameSetup); + } } if (NeedsWinFPO) { @@ -1790,7 +1892,8 @@ // Realign stack after we pushed callee-saved registers (so that we'll be // able to calculate their offsets from the frame pointer). // Don't do this for Win64, it needs to realign the stack after the prologue. - if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) { + if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) && + !ArgBaseReg.isValid()) { assert(HasFP && "There should be a frame pointer if stack is realigned."); BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign); @@ -2048,6 +2151,16 @@ .setMIFlag(MachineInstr::FrameSetup); } } + if (ArgBaseReg.isValid()) { + // Save argument base pointer. + auto *MI = X86FI->getStackPtrSaveMI(); + int FI = MI->getOperand(1).getIndex(); + unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr; + // movl %basereg, offset(%ebp) + addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI) + .addReg(ArgBaseReg) + .setMIFlag(MachineInstr::FrameSetup); + } if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { // Mark end of stack pointer adjustment. @@ -2196,6 +2309,34 @@ !MF.getTarget().getTargetTriple().isOSWindows()) && MF.needsFrameMoves(); + Register ArgBaseReg; + if (auto *MI = X86FI->getStackPtrSaveMI()) { + unsigned Opc = X86::LEA32r; + Register StackReg = X86::ESP; + ArgBaseReg = MI->getOperand(0).getReg(); + if (STI.is64Bit()) { + Opc = X86::LEA64r; + StackReg = X86::RSP; + } + // leal -8(%basereg), %esp + // .cfi_def_cfa %esp, 4 + BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg) + .addUse(ArgBaseReg) + .addImm(1) + .addUse(X86::NoRegister) + .addImm((int64_t)SlotSize * -2) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameDestroy); + if (NeedsDwarfCFI) { + unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize), + MachineInstr::FrameDestroy); + --MBBI; + } + --MBBI; + } + if (IsFunclet) { assert(HasFP && "EH funclets without FP not yet implemented"); NumBytes = getWinEHFuncletFrameSize(MF); @@ -2237,11 +2378,13 @@ } if (NeedsDwarfCFI) { - unsigned DwarfStackPtr = - TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize), - MachineInstr::FrameDestroy); + if (!ArgBaseReg.isValid()) { + unsigned DwarfStackPtr = + TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize), + MachineInstr::FrameDestroy); + } if (!MBB.succ_empty() && !MBB.isReturnBlock()) { unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); BuildCFI(MBB, AfterPop, DL, @@ -2271,6 +2414,15 @@ --MBBI; } + if (ArgBaseReg.isValid()) { + // Restore argument base pointer. + auto *MI = X86FI->getStackPtrSaveMI(); + int FI = MI->getOperand(1).getIndex(); + unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm; + // movl offset(%ebp), %basereg + addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI) + .setMIFlag(MachineInstr::FrameDestroy); + } MBBI = FirstCSPop; if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET) @@ -3889,8 +4041,16 @@ void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced( MachineFunction &MF, RegScavenger *RS) const { + auto *X86FI = MF.getInfo(); + if (STI.is32Bit() && MF.hasEHFunclets()) restoreWinEHStackPointersInParent(MF); + // We have emitted prolog and epilog. Don't need stack pointer saving + // instruction any more. + if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) { + MI->eraseFromParent(); + X86FI->setStackPtrSaveMI(nullptr); + } } void X86FrameLowering::restoreWinEHStackPointersInParent( diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -117,6 +117,8 @@ /// determine if we should insert tilerelease in frame lowering. bool HasVirtualTileReg = false; + MachineInstr *StackPtrSaveMI = nullptr; + std::optional SwiftAsyncContextFrameIdx; // Preallocated fields are only used during isel. @@ -225,6 +227,9 @@ bool hasVirtualTileReg() const { return HasVirtualTileReg; } void setHasVirtualTileReg(bool v) { HasVirtualTileReg = v; } + void setStackPtrSaveMI(MachineInstr *MI) { StackPtrSaveMI = MI; } + MachineInstr *getStackPtrSaveMI() const { return StackPtrSaveMI; } + std::optional getSwiftAsyncContextFrameIdx() const { return SwiftAsyncContextFrameIdx; } diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h --- a/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/llvm/lib/Target/X86/X86RegisterInfo.h @@ -133,6 +133,10 @@ bool canRealignStack(const MachineFunction &MF) const override; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + unsigned FIOperandNum, Register BaseReg, + int FIOffset) const; + bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -702,6 +702,11 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { const X86MachineFunctionInfo *X86FI = MF.getInfo(); + // We have a virtual register to reference argument, and don't need base + // pointer. + if (X86FI->getStackPtrSaveMI() != nullptr) + return false; + if (X86FI->hasPreallocatedCall()) return true; @@ -778,6 +783,45 @@ llvm_unreachable("impossible"); } +void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + unsigned FIOperandNum, + Register BaseReg, + int FIOffset) const { + MachineInstr &MI = *II; + unsigned Opc = MI.getOpcode(); + if (Opc == TargetOpcode::LOCAL_ESCAPE) { + MachineOperand &FI = MI.getOperand(FIOperandNum); + FI.ChangeToImmediate(FIOffset); + return; + } + + MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false); + + // The frame index format for stackmaps and patchpoints is different from the + // X86 format. It only has a FI and an offset. + if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { + assert(BasePtr == FramePtr && "Expected the FP as base register"); + int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset; + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); + return; + } + + if (MI.getOperand(FIOperandNum + 3).isImm()) { + // Offset is a 32-bit integer. + int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); + int Offset = FIOffset + Imm; + assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && + "Requesting 64-bit offset in 32-bit immediate!"); + if (Offset != 0 || !tryOptimizeLEAtoMOV(II)) + MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); + } else { + // Offset is symbolic. This is extremely rare. + uint64_t Offset = + FIOffset + (uint64_t)MI.getOperand(FIOperandNum + 3).getOffset(); + MI.getOperand(FIOperandNum + 3).setOffset(Offset); + } +} + bool X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -433,6 +433,18 @@ (add RAX, RCX, RDX, RSI, RDI, R8, R9, RBX, R14, R15, R12, R13, RBP)>; +// It includes the GPR that are used as scratch register for Linux64 calling +// convention. +def GR64_ArgRef: RegisterClass<"X86", [i64], 64, (add R10, R11)> { + let GeneratePressureSet = 0; +} + +// It includes the GPR that are used as scratch register for Linux32 calling +// convention. +def GR32_ArgRef: RegisterClass<"X86", [i32], 32, (add ECX, EDX)> { + let GeneratePressureSet = 0; +} + // Segment registers for use by MOV instructions (and others) that have a // segment register as one operand. Always contain a 16-bit segment // descriptor. diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -104,6 +104,7 @@ initializePseudoProbeInserterPass(PR); initializeX86ReturnThunksPass(PR); initializeX86DAGToDAGISelPass(PR); + initializeX86ArgumentStackSlotPassPass(PR); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -518,6 +519,7 @@ } void X86PassConfig::addPreRegAlloc() { + addPass(createX86ArgumentStackSlotPass()); if (getOptLevel() != CodeGenOpt::None) { addPass(&LiveRangeShrinkID); addPass(createX86FixupSetCC()); diff --git a/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir b/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir --- a/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir +++ b/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir @@ -28,8 +28,8 @@ liveins: $rdi, $rsi ; CHECK-LABEL: name: test - ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4456458 /* regdef:GR64 */, def $rsi, 4456458 /* regdef:GR64 */, def dead $rdi, - INLINEASM &foo, 0, 4456458, def $rsi, 4456458, def dead $rdi, 2147549193, killed $rdi, 2147483657, killed $rsi, 12, implicit-def dead early-clobber $eflags + ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4521994 /* regdef:GR64 */, def $rsi, 4521994 /* regdef:GR64 */, def dead $rdi, + INLINEASM &foo, 0, 4521994, def $rsi, 4521994, def dead $rdi, 2147549193, killed $rdi, 2147483657, killed $rsi, 12, implicit-def dead early-clobber $eflags $rax = MOV64rr killed $rsi RET64 killed $rax ... @@ -45,8 +45,8 @@ ; Verify that the register ties are preserved. ; CHECK-LABEL: name: test2 - ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4456458 /* regdef:GR64 */, def $rsi, 4456458 /* regdef:GR64 */, def dead $rdi, 2147549193 /* reguse tiedto:$1 */, killed $rdi(tied-def 5), 2147483657 /* reguse tiedto:$0 */, killed $rsi(tied-def 3), 12 /* clobber */, implicit-def dead early-clobber $eflags - INLINEASM &foo, 0, 4456458, def $rsi, 4456458, def dead $rdi, 2147549193, killed $rdi(tied-def 5), 2147483657, killed $rsi(tied-def 3), 12, implicit-def dead early-clobber $eflags + ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4521994 /* regdef:GR64 */, def $rsi, 4521994 /* regdef:GR64 */, def dead $rdi, 2147549193 /* reguse tiedto:$1 */, killed $rdi(tied-def 5), 2147483657 /* reguse tiedto:$0 */, killed $rsi(tied-def 3), 12 /* clobber */, implicit-def dead early-clobber $eflags + INLINEASM &foo, 0, 4521994, def $rsi, 4521994, def dead $rdi, 2147549193, killed $rdi(tied-def 5), 2147483657, killed $rsi(tied-def 3), 12, implicit-def dead early-clobber $eflags $rax = MOV64rr killed $rsi RET64 killed $rax ... diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -41,6 +41,7 @@ ; CHECK-NEXT: X86 PIC Global Base Reg Initialization ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: Local Stack Slot Allocation +; CHECK-NEXT: Argument Stack Rebase ; CHECK-NEXT: X86 speculative load hardening ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: X86 EFLAGS copy lowering diff --git a/llvm/test/CodeGen/X86/i386-baseptr.ll b/llvm/test/CodeGen/X86/i386-baseptr.ll --- a/llvm/test/CodeGen/X86/i386-baseptr.ll +++ b/llvm/test/CodeGen/X86/i386-baseptr.ll @@ -39,16 +39,18 @@ define void @clobber_base() #0 { ; CHECK-LABEL: clobber_base: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: .cfi_def_cfa %ecx, 0 +; CHECK-NEXT: andl $-128, %esp +; CHECK-NEXT: pushl -4(%ecx) ; CHECK-NEXT: pushl %ebp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: .cfi_offset %ebp, -8 ; CHECK-NEXT: movl %esp, %ebp -; CHECK-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NEXT: .cfi_escape 0x10, 0x05, 0x02, 0x75, 0x00 # ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: andl $-128, %esp -; CHECK-NEXT: subl $128, %esp -; CHECK-NEXT: movl %esp, %esi -; CHECK-NEXT: .cfi_offset %esi, -12 +; CHECK-NEXT: subl $244, %esp +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x75, 0x7c # +; CHECK-NEXT: .cfi_escape 0x0f, 0x04, 0x75, 0x84, 0x7f, 0x06 # ; CHECK-NEXT: calll helper@PLT ; CHECK-NEXT: movl %esp, %ecx ; CHECK-NEXT: leal 31(,%eax,4), %eax @@ -63,12 +65,14 @@ ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edx, (%esi) +; CHECK-NEXT: movl %edx, -120(%ebp) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $0, (%ecx,%eax) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: leal -4(%ebp), %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %ebp +; CHECK-NEXT: leal -8(%ecx), %esp ; CHECK-NEXT: .cfi_def_cfa %esp, 4 ; CHECK-NEXT: retl entry: @@ -135,6 +139,6 @@ ret void } -attributes #0 = { "frame-pointer"="all"} +attributes #0 = {"frame-pointer"="all"} !llvm.module.flags = !{!0} !0 = !{i32 2, !"override-stack-alignment", i32 32} diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -116,6 +116,7 @@ ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions +; CHECK-NEXT: Argument Stack Rebase ; CHECK-NEXT: Live Range Shrink ; CHECK-NEXT: X86 Fixup SetCC ; CHECK-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir --- a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir +++ b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir @@ -350,7 +350,7 @@ ; CHECK-NEXT: CMP64rr [[NOT64r2]], [[COPY6]], implicit-def $eflags ; CHECK-NEXT: undef %100.sub_32bit:gr64_with_sub_8bit = MOV32ri 0 ; CHECK-NEXT: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[CMOV64rr]], %100, 4, implicit killed $eflags - ; CHECK-NEXT: INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4456457 /* reguse:GR64 */, %100, 4456457 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags + ; CHECK-NEXT: INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4521993 /* reguse:GR64 */, %100, 4521993 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags ; CHECK-NEXT: LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, [[COPY5]], implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK-NEXT: $rdi = COPY [[COPY4]] @@ -470,7 +470,7 @@ %63:gr64 = NOT64r %63 CMP64rr %63, %31, implicit-def $eflags %63:gr64 = CMOV64rr %63, %53, 4, implicit killed $eflags - INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4456457 /* reguse:GR64 */, %53, 4456457 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags + INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4521993 /* reguse:GR64 */, %53, 4521993 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, %65, implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `ptr addrspace(1) undef`, addrspace 1) ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $rdi = COPY %64 diff --git a/llvm/test/CodeGen/X86/x86-64-baseptr.ll b/llvm/test/CodeGen/X86/x86-64-baseptr.ll --- a/llvm/test/CodeGen/X86/x86-64-baseptr.ll +++ b/llvm/test/CodeGen/X86/x86-64-baseptr.ll @@ -77,16 +77,18 @@ define void @clobber_base() #0 { ; CHECK-LABEL: clobber_base: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r10 +; CHECK-NEXT: .cfi_def_cfa %r10, 0 +; CHECK-NEXT: andq $-128, %rsp +; CHECK-NEXT: pushq -16(%r10) ; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: andq $-128, %rsp -; CHECK-NEXT: subq $128, %rsp -; CHECK-NEXT: movq %rsp, %rbx -; CHECK-NEXT: .cfi_offset %rbx, -24 +; CHECK-NEXT: subq $232, %rsp +; CHECK-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # +; CHECK-NEXT: .cfi_escape 0x0f, 0x04, 0x76, 0x88, 0x7f, 0x06 # ; CHECK-NEXT: callq helper@PLT ; CHECK-NEXT: movq %rsp, %rcx ; CHECK-NEXT: movl %eax, %eax @@ -102,27 +104,31 @@ ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edx, (%rbx) +; CHECK-NEXT: movl %edx, -112(%rbp) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $0, (%rcx,%rax) +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload ; CHECK-NEXT: leaq -8(%rbp), %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp +; CHECK-NEXT: leaq -16(%r10), %rsp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ; CHECK-NEXT: retq ; ; X32ABI-LABEL: clobber_base: ; X32ABI: # %bb.0: # %entry +; X32ABI-NEXT: leaq {{[0-9]+}}(%esp), %r10 +; X32ABI-NEXT: .cfi_def_cfa %r10, 0 +; X32ABI-NEXT: andl $-128, %esp +; X32ABI-NEXT: pushq -16(%r10) ; X32ABI-NEXT: pushq %rbp -; X32ABI-NEXT: .cfi_def_cfa_offset 16 -; X32ABI-NEXT: .cfi_offset %rbp, -16 ; X32ABI-NEXT: movl %esp, %ebp -; X32ABI-NEXT: .cfi_def_cfa_register %rbp +; X32ABI-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # ; X32ABI-NEXT: pushq %rbx -; X32ABI-NEXT: andl $-128, %esp -; X32ABI-NEXT: subl $128, %esp -; X32ABI-NEXT: movl %esp, %ebx -; X32ABI-NEXT: .cfi_offset %rbx, -24 +; X32ABI-NEXT: subl $232, %esp +; X32ABI-NEXT: movq %r10, {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Spill +; X32ABI-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # +; X32ABI-NEXT: .cfi_escape 0x0f, 0x04, 0x76, 0x88, 0x7f, 0x06 # ; X32ABI-NEXT: callq helper@PLT ; X32ABI-NEXT: # kill: def $eax killed $eax def $rax ; X32ABI-NEXT: leal 31(,%rax,4), %eax @@ -138,12 +144,14 @@ ; X32ABI-NEXT: #NO_APP ; X32ABI-NEXT: movl $8, %edx ; X32ABI-NEXT: #APP -; X32ABI-NEXT: movl %edx, (%ebx) +; X32ABI-NEXT: movl %edx, -112(%ebp) ; X32ABI-NEXT: #NO_APP ; X32ABI-NEXT: movl $0, (%ecx,%eax) +; X32ABI-NEXT: movq {{[-0-9]+}}(%e{{[sb]}}p), %r10 # 8-byte Reload ; X32ABI-NEXT: leal -8(%ebp), %esp ; X32ABI-NEXT: popq %rbx ; X32ABI-NEXT: popq %rbp +; X32ABI-NEXT: leaq -16(%r10), %rsp ; X32ABI-NEXT: .cfi_def_cfa %rsp, 8 ; X32ABI-NEXT: retq entry: @@ -160,14 +168,15 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 %param3, i32 %param4, i32 %param5, i32 %param6, i32 %param7, i32 %param8, i32 %param9, i32 %param10, i32 %param11, i32 %param12) #0 { ; CHECK-LABEL: clobber_baseptr_argptr: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r10 +; CHECK-NEXT: .cfi_def_cfa %r10, 0 +; CHECK-NEXT: andq $-128, %rsp +; CHECK-NEXT: pushq -16(%r10) ; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: andq $-128, %rsp -; CHECK-NEXT: subq $256, %rsp # imm = 0x100 +; CHECK-NEXT: subq $360, %rsp # imm = 0x168 ; CHECK-NEXT: movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -176,17 +185,18 @@ ; CHECK-NEXT: movaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movq %rsp, %rbx -; CHECK-NEXT: .cfi_offset %rbx, -24 -; CHECK-NEXT: .cfi_offset %xmm8, -160 -; CHECK-NEXT: .cfi_offset %xmm9, -144 -; CHECK-NEXT: .cfi_offset %xmm10, -128 -; CHECK-NEXT: .cfi_offset %xmm11, -112 -; CHECK-NEXT: .cfi_offset %xmm12, -96 -; CHECK-NEXT: .cfi_offset %xmm13, -80 -; CHECK-NEXT: .cfi_offset %xmm14, -64 -; CHECK-NEXT: .cfi_offset %xmm15, -48 -; CHECK-NEXT: movl 16(%rbp), %r14d +; CHECK-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # +; CHECK-NEXT: .cfi_escape 0x10, 0x19, 0x02, 0x76, 0xf0, 0x7e # +; CHECK-NEXT: .cfi_escape 0x10, 0x1a, 0x02, 0x76, 0x80, 0x7f # +; CHECK-NEXT: .cfi_escape 0x10, 0x1b, 0x02, 0x76, 0x90, 0x7f # +; CHECK-NEXT: .cfi_escape 0x10, 0x1c, 0x02, 0x76, 0xa0, 0x7f # +; CHECK-NEXT: .cfi_escape 0x10, 0x1d, 0x02, 0x76, 0xb0, 0x7f # +; CHECK-NEXT: .cfi_escape 0x10, 0x1e, 0x02, 0x76, 0x40 # +; CHECK-NEXT: .cfi_escape 0x10, 0x1f, 0x02, 0x76, 0x50 # +; CHECK-NEXT: .cfi_escape 0x10, 0x20, 0x02, 0x76, 0x60 # +; CHECK-NEXT: .cfi_escape 0x0f, 0x04, 0x76, 0x88, 0x7e, 0x06 # +; CHECK-NEXT: movl (%r10), %r14d ; CHECK-NEXT: callq helper@PLT ; CHECK-NEXT: movq %rsp, %rcx ; CHECK-NEXT: movl %eax, %eax @@ -205,7 +215,7 @@ ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edx, (%rbx) +; CHECK-NEXT: movl %edx, -240(%rbp) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl %r14d, (%rcx,%rax) ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload @@ -216,22 +226,25 @@ ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload ; CHECK-NEXT: leaq -8(%rbp), %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp +; CHECK-NEXT: leaq -16(%r10), %rsp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ; CHECK-NEXT: retq ; ; X32ABI-LABEL: clobber_baseptr_argptr: ; X32ABI: # %bb.0: # %entry +; X32ABI-NEXT: leaq {{[0-9]+}}(%esp), %r10 +; X32ABI-NEXT: .cfi_def_cfa %r10, 0 +; X32ABI-NEXT: andl $-128, %esp +; X32ABI-NEXT: pushq -16(%r10) ; X32ABI-NEXT: pushq %rbp -; X32ABI-NEXT: .cfi_def_cfa_offset 16 -; X32ABI-NEXT: .cfi_offset %rbp, -16 ; X32ABI-NEXT: movl %esp, %ebp -; X32ABI-NEXT: .cfi_def_cfa_register %rbp +; X32ABI-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # ; X32ABI-NEXT: pushq %rbx -; X32ABI-NEXT: andl $-128, %esp -; X32ABI-NEXT: subl $256, %esp # imm = 0x100 +; X32ABI-NEXT: subl $360, %esp # imm = 0x168 ; X32ABI-NEXT: movaps %xmm15, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32ABI-NEXT: movaps %xmm14, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32ABI-NEXT: movaps %xmm13, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill @@ -240,17 +253,18 @@ ; X32ABI-NEXT: movaps %xmm10, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32ABI-NEXT: movaps %xmm9, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32ABI-NEXT: movaps %xmm8, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; X32ABI-NEXT: movl %esp, %ebx -; X32ABI-NEXT: .cfi_offset %rbx, -24 -; X32ABI-NEXT: .cfi_offset %xmm8, -160 -; X32ABI-NEXT: .cfi_offset %xmm9, -144 -; X32ABI-NEXT: .cfi_offset %xmm10, -128 -; X32ABI-NEXT: .cfi_offset %xmm11, -112 -; X32ABI-NEXT: .cfi_offset %xmm12, -96 -; X32ABI-NEXT: .cfi_offset %xmm13, -80 -; X32ABI-NEXT: .cfi_offset %xmm14, -64 -; X32ABI-NEXT: .cfi_offset %xmm15, -48 -; X32ABI-NEXT: movl 16(%ebp), %r14d +; X32ABI-NEXT: movq %r10, {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Spill +; X32ABI-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # +; X32ABI-NEXT: .cfi_escape 0x10, 0x19, 0x02, 0x76, 0xf0, 0x7e # +; X32ABI-NEXT: .cfi_escape 0x10, 0x1a, 0x02, 0x76, 0x80, 0x7f # +; X32ABI-NEXT: .cfi_escape 0x10, 0x1b, 0x02, 0x76, 0x90, 0x7f # +; X32ABI-NEXT: .cfi_escape 0x10, 0x1c, 0x02, 0x76, 0xa0, 0x7f # +; X32ABI-NEXT: .cfi_escape 0x10, 0x1d, 0x02, 0x76, 0xb0, 0x7f # +; X32ABI-NEXT: .cfi_escape 0x10, 0x1e, 0x02, 0x76, 0x40 # +; X32ABI-NEXT: .cfi_escape 0x10, 0x1f, 0x02, 0x76, 0x50 # +; X32ABI-NEXT: .cfi_escape 0x10, 0x20, 0x02, 0x76, 0x60 # +; X32ABI-NEXT: .cfi_escape 0x0f, 0x04, 0x76, 0x88, 0x7e, 0x06 # +; X32ABI-NEXT: movl (%r10), %r14d ; X32ABI-NEXT: callq helper@PLT ; X32ABI-NEXT: # kill: def $eax killed $eax def $rax ; X32ABI-NEXT: leal 31(,%rax,4), %eax @@ -269,7 +283,7 @@ ; X32ABI-NEXT: #NO_APP ; X32ABI-NEXT: movl $8, %edx ; X32ABI-NEXT: #APP -; X32ABI-NEXT: movl %edx, (%ebx) +; X32ABI-NEXT: movl %edx, -240(%ebp) ; X32ABI-NEXT: #NO_APP ; X32ABI-NEXT: movl %r14d, (%ecx,%eax) ; X32ABI-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm8 # 16-byte Reload @@ -280,9 +294,11 @@ ; X32ABI-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm13 # 16-byte Reload ; X32ABI-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm14 # 16-byte Reload ; X32ABI-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm15 # 16-byte Reload +; X32ABI-NEXT: movq {{[-0-9]+}}(%e{{[sb]}}p), %r10 # 8-byte Reload ; X32ABI-NEXT: leal -8(%ebp), %esp ; X32ABI-NEXT: popq %rbx ; X32ABI-NEXT: popq %rbp +; X32ABI-NEXT: leaq -16(%r10), %rsp ; X32ABI-NEXT: .cfi_def_cfa %rsp, 8 ; X32ABI-NEXT: retq entry: @@ -298,6 +314,6 @@ ret void } -attributes #0 = { "frame-pointer"="all"} +attributes #0 = {"frame-pointer"="all"} !llvm.module.flags = !{!0} !0 = !{i32 2, !"override-stack-alignment", i32 32}