diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4731,6 +4731,11 @@ return InlineAsm::Constraint_Unknown; } + virtual void lowerInlineAsmRegClobber(MachineFunction &MF, + Register Reg) const { + return; + } + /// Try to replace an X constraint, which matches anything, with another that /// has more specific requirements based on the type of the corresponding /// operand. This returns null if there is no replacement to make. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -9045,6 +9045,14 @@ } return false; }; + auto DetectBasePtrRegister = [&]() { + MachineFunction &MF = DAG.getMachineFunction(); + for (unsigned Reg : OpInfo.AssignedRegs.Regs) { + if (Register::isPhysicalRegister(Reg)) { + TLI.lowerInlineAsmRegClobber(MF, Reg); + } + } + }; assert((OpInfo.ConstraintType != TargetLowering::C_Address || (OpInfo.Type == InlineAsm::isInput && !OpInfo.isMatchingInputConstraint())) && @@ -9077,6 +9085,7 @@ if (DetectWriteToReservedRegister()) return; + DetectBasePtrRegister(); // Add information to the INLINEASM node to know that this register is // set. @@ -9259,6 +9268,7 @@ if (DetectWriteToReservedRegister()) return; + DetectBasePtrRegister(); SDLoc dl = getCurSDLoc(); @@ -9272,10 +9282,12 @@ case InlineAsm::isClobber: // Add the clobbered value to the operand list, so that the register // allocator is aware that the physreg got clobbered. - if (!OpInfo.AssignedRegs.Regs.empty()) + if (!OpInfo.AssignedRegs.Regs.empty()) { OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, false, 0, getCurSDLoc(), DAG, AsmNodeOperands); + DetectBasePtrRegister(); + } break; } } diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -265,6 +265,10 @@ void emitCatchRetReturnValue(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineInstr *CatchRet) const; + + /// Save and restore base pointer register around MI which clobber the + /// register. + void saveRestoreBasePointerReg(MachineFunction &MF) const; }; } // End llvm namespace diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -3862,6 +3862,7 @@ void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced( MachineFunction &MF, RegScavenger *RS) const { + saveRestoreBasePointerReg(MF); if (STI.is32Bit() && MF.hasEHFunclets()) restoreWinEHStackPointersInParent(MF); } @@ -3880,3 +3881,35 @@ /*RestoreSP=*/IsSEH); } } + +void X86FrameLowering::saveRestoreBasePointerReg(MachineFunction &MF) const { + X86MachineFunctionInfo *X86FI = MF.getInfo(); + if (!TRI->hasBasePointer(MF)) + return; + + Register BasePtr = TRI->getBaseRegister(); + for (MachineBasicBlock &MBB : MF) { + for (MachineBasicBlock::iterator MII = MBB.begin(); MII != MBB.end(); + ++MII) { + if (!MII->isInlineAsm()) + continue; + for (const MachineOperand &MO : MII->operands()) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg)) + continue; + if (!TRI->isSuperOrSubRegisterEq(BasePtr, Reg)) + continue; + + MII++; + DebugLoc DL; + assert(X86FI->getRestoreBasePointer()); + Register FramePtr = TRI->getFrameRegister(MF); + unsigned Opc = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm; + addRegOffset(BuildMI(MBB, MII, DL, TII.get(Opc), BasePtr), FramePtr, + false, X86FI->getRestoreBasePointerOffset()); + } + } + } +} diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1272,6 +1272,9 @@ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); } + void lowerInlineAsmRegClobber(MachineFunction &MF, + Register Reg) const override; + /// Handle Lowering flag assembly outputs. SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, const SDLoc &DL, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58244,6 +58244,19 @@ return Res; } +void X86TargetLowering::lowerInlineAsmRegClobber(MachineFunction &MF, + Register Reg) const { + X86MachineFunctionInfo *X86FI = MF.getInfo(); + const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); + const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + if (!RegInfo->hasBasePointer(MF)) + return; + Register BasePtr = RegInfo->getBaseRegister(); + if (!TRI->isSuperOrSubRegisterEq(BasePtr, Reg)) + return; + X86FI->setRestoreBasePointer(&MF); +} + bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { // Integer division on x86 is expensive. However, when aggressively optimizing // for code size, we prefer to use a div instruction, as it is usually smaller diff --git a/llvm/test/CodeGen/X86/i386-baseptr.ll b/llvm/test/CodeGen/X86/i386-baseptr.ll --- a/llvm/test/CodeGen/X86/i386-baseptr.ll +++ b/llvm/test/CodeGen/X86/i386-baseptr.ll @@ -40,6 +40,7 @@ ; CHECK-NEXT: andl $-128, %esp ; CHECK-NEXT: subl $128, %esp ; CHECK-NEXT: movl %esp, %esi +; CHECK-NEXT: movl %esp, -16(%ebp) ; CHECK-NEXT: calll helper@PLT ; CHECK-NEXT: movl %esp, %ecx ; CHECK-NEXT: leal 31(,%eax,4), %eax @@ -52,6 +53,7 @@ ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movl -16(%ebp), %esi ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP ; CHECK-NEXT: movl %edx, (%esi) diff --git a/llvm/test/CodeGen/X86/x86-64-baseptr.ll b/llvm/test/CodeGen/X86/x86-64-baseptr.ll --- a/llvm/test/CodeGen/X86/x86-64-baseptr.ll +++ b/llvm/test/CodeGen/X86/x86-64-baseptr.ll @@ -73,6 +73,7 @@ ; CHECK-NEXT: andq $-128, %rsp ; CHECK-NEXT: subq $128, %rsp ; CHECK-NEXT: movq %rsp, %rbx +; CHECK-NEXT: movq %rsp, -48(%rbp) ; CHECK-NEXT: callq helper@PLT ; CHECK-NEXT: movq %rsp, %rcx ; CHECK-NEXT: movl %eax, %eax @@ -86,6 +87,7 @@ ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movq -48(%rbp), %rbx ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP ; CHECK-NEXT: movl %edx, (%rbx) @@ -104,6 +106,7 @@ ; X32ABI-NEXT: andl $-128, %esp ; X32ABI-NEXT: subl $128, %esp ; X32ABI-NEXT: movl %esp, %ebx +; X32ABI-NEXT: movl %esp, -48(%ebp) ; X32ABI-NEXT: callq helper@PLT ; X32ABI-NEXT: # kill: def $eax killed $eax def $rax ; X32ABI-NEXT: leal 31(,%rax,4), %eax @@ -117,6 +120,7 @@ ; X32ABI-NEXT: #APP ; X32ABI-NEXT: nop ; X32ABI-NEXT: #NO_APP +; X32ABI-NEXT: movl -48(%ebp), %ebx ; X32ABI-NEXT: movl $8, %edx ; X32ABI-NEXT: #APP ; X32ABI-NEXT: movl %edx, (%ebx)