diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4738,6 +4738,11 @@ return InlineAsm::Constraint_Unknown; } + virtual void lowerInlineAsmRegClobber(MachineFunction &MF, + Register Reg) const { + return; + } + /// Try to replace an X constraint, which matches anything, with another that /// has more specific requirements based on the type of the corresponding /// operand. This returns null if there is no replacement to make. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -9043,6 +9043,14 @@ } return false; }; + auto DetectBasePtrRegister = [&]() { + MachineFunction &MF = DAG.getMachineFunction(); + for (unsigned Reg : OpInfo.AssignedRegs.Regs) { + if (Register::isPhysicalRegister(Reg)) { + TLI.lowerInlineAsmRegClobber(MF, Reg); + } + } + }; assert((OpInfo.ConstraintType != TargetLowering::C_Address || (OpInfo.Type == InlineAsm::isInput && !OpInfo.isMatchingInputConstraint())) && @@ -9075,6 +9083,7 @@ if (DetectWriteToReservedRegister()) return; + DetectBasePtrRegister(); // Add information to the INLINEASM node to know that this register is // set. @@ -9257,6 +9266,7 @@ if (DetectWriteToReservedRegister()) return; + DetectBasePtrRegister(); SDLoc dl = getCurSDLoc(); @@ -9270,10 +9280,12 @@ case InlineAsm::isClobber: // Add the clobbered value to the operand list, so that the register // allocator is aware that the physreg got clobbered. - if (!OpInfo.AssignedRegs.Regs.empty()) + if (!OpInfo.AssignedRegs.Regs.empty()) { OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, false, 0, getCurSDLoc(), DAG, AsmNodeOperands); + DetectBasePtrRegister(); + } break; } } diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -265,6 +265,10 @@ void emitCatchRetReturnValue(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineInstr *CatchRet) const; + + /// Save and restore base pointer register around MI which clobber the + /// register. + void saveRestoreBasePointerReg(MachineFunction &MF) const; }; } // End llvm namespace diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -3887,6 +3887,7 @@ void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced( MachineFunction &MF, RegScavenger *RS) const { + saveRestoreBasePointerReg(MF); if (STI.is32Bit() && MF.hasEHFunclets()) restoreWinEHStackPointersInParent(MF); } @@ -3905,3 +3906,35 @@ /*RestoreSP=*/IsSEH); } } + +void X86FrameLowering::saveRestoreBasePointerReg(MachineFunction &MF) const { + X86MachineFunctionInfo *X86FI = MF.getInfo(); + if (!TRI->hasBasePointer(MF)) + return; + + Register BasePtr = TRI->getBaseRegister(); + for (MachineBasicBlock &MBB : MF) { + for (MachineBasicBlock::iterator MII = MBB.begin(); MII != MBB.end(); + ++MII) { + if (!MII->isInlineAsm()) + continue; + for (const MachineOperand &MO : MII->operands()) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg)) + continue; + if (!TRI->isSuperOrSubRegisterEq(BasePtr, Reg)) + continue; + + MII++; + DebugLoc DL; + assert(X86FI->getRestoreBasePointer()); + Register FramePtr = TRI->getFrameRegister(MF); + unsigned Opc = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm; + addRegOffset(BuildMI(MBB, MII, DL, TII.get(Opc), BasePtr), FramePtr, + false, X86FI->getRestoreBasePointerOffset()); + } + } + } +} diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1274,6 +1274,9 @@ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); } + void lowerInlineAsmRegClobber(MachineFunction &MF, + Register Reg) const override; + /// Handle Lowering flag assembly outputs. SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, const SDLoc &DL, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58361,6 +58361,19 @@ return Res; } +void X86TargetLowering::lowerInlineAsmRegClobber(MachineFunction &MF, + Register Reg) const { + X86MachineFunctionInfo *X86FI = MF.getInfo(); + const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); + const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + if (!RegInfo->hasBasePointer(MF)) + return; + Register BasePtr = RegInfo->getBaseRegister(); + if (!TRI->isSuperOrSubRegisterEq(BasePtr, Reg)) + return; + X86FI->setRestoreBasePointer(&MF); +} + bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { // Integer division on x86 is expensive. However, when aggressively optimizing // for code size, we prefer to use a div instruction, as it is usually smaller diff --git a/llvm/test/CodeGen/X86/i386-baseptr.ll b/llvm/test/CodeGen/X86/i386-baseptr.ll --- a/llvm/test/CodeGen/X86/i386-baseptr.ll +++ b/llvm/test/CodeGen/X86/i386-baseptr.ll @@ -37,9 +37,11 @@ ; CHECK-NEXT: pushl %ebp ; CHECK-NEXT: movl %esp, %ebp ; CHECK-NEXT: pushl %esi +; CHECK-NEXT: pushl %esi ; CHECK-NEXT: andl $-128, %esp ; CHECK-NEXT: subl $128, %esp ; CHECK-NEXT: movl %esp, %esi +; CHECK-NEXT: movl %esp, -8(%ebp) ; CHECK-NEXT: calll helper@PLT ; CHECK-NEXT: movl %esp, %ecx ; CHECK-NEXT: leal 31(,%eax,4), %eax @@ -52,12 +54,14 @@ ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movl -8(%ebp), %esi ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP ; CHECK-NEXT: movl %edx, (%esi) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $0, (%ecx,%eax) -; CHECK-NEXT: leal -4(%ebp), %esp +; CHECK-NEXT: leal -8(%ebp), %esp +; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl diff --git a/llvm/test/CodeGen/X86/swifttail-realign.ll b/llvm/test/CodeGen/X86/swifttail-realign.ll --- a/llvm/test/CodeGen/X86/swifttail-realign.ll +++ b/llvm/test/CodeGen/X86/swifttail-realign.ll @@ -16,7 +16,7 @@ ; CHECK: movq [[RETADDR]], 8(%rbp) ; CHECK: movq $42, 16(%rbp) ; CHECK: movq $0, 24(%rbp) -; CHECK: leaq -8(%rbp), %rsp +; CHECK: leaq -16(%rbp), %rsp ; CHECK: popq %rbx ; CHECK: popq %rbp ; CHECK: jmp callee diff --git a/llvm/test/CodeGen/X86/x86-64-baseptr.ll b/llvm/test/CodeGen/X86/x86-64-baseptr.ll --- a/llvm/test/CodeGen/X86/x86-64-baseptr.ll +++ b/llvm/test/CodeGen/X86/x86-64-baseptr.ll @@ -70,9 +70,11 @@ ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: andq $-128, %rsp ; CHECK-NEXT: subq $128, %rsp ; CHECK-NEXT: movq %rsp, %rbx +; CHECK-NEXT: movq %rsp, -16(%rbp) ; CHECK-NEXT: callq helper@PLT ; CHECK-NEXT: movq %rsp, %rcx ; CHECK-NEXT: movl %eax, %eax @@ -86,12 +88,14 @@ ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movq -16(%rbp), %rbx ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP ; CHECK-NEXT: movl %edx, (%rbx) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $0, (%rcx,%rax) -; CHECK-NEXT: leaq -8(%rbp), %rsp +; CHECK-NEXT: leaq -16(%rbp), %rsp +; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq @@ -101,9 +105,11 @@ ; X32ABI-NEXT: pushq %rbp ; X32ABI-NEXT: movl %esp, %ebp ; X32ABI-NEXT: pushq %rbx +; X32ABI-NEXT: pushq %ebx ; X32ABI-NEXT: andl $-128, %esp ; X32ABI-NEXT: subl $128, %esp ; X32ABI-NEXT: movl %esp, %ebx +; X32ABI-NEXT: movl %esp, -16(%ebp) ; X32ABI-NEXT: callq helper@PLT ; X32ABI-NEXT: # kill: def $eax killed $eax def $rax ; X32ABI-NEXT: leal 31(,%rax,4), %eax @@ -117,12 +123,14 @@ ; X32ABI-NEXT: #APP ; X32ABI-NEXT: nop ; X32ABI-NEXT: #NO_APP +; X32ABI-NEXT: movl -16(%ebp), %ebx ; X32ABI-NEXT: movl $8, %edx ; X32ABI-NEXT: #APP ; X32ABI-NEXT: movl %edx, (%ebx) ; X32ABI-NEXT: #NO_APP ; X32ABI-NEXT: movl $0, (%ecx,%eax) -; X32ABI-NEXT: leal -8(%ebp), %esp +; X32ABI-NEXT: leal -16(%ebp), %esp +; X32ABI-NEXT: popq %ebx ; X32ABI-NEXT: popq %rbx ; X32ABI-NEXT: popq %rbp ; X32ABI-NEXT: retq