Skip to content

Commit 91e11a8

Browse files
committedFeb 13, 2018
[X86] Use EDI for retpoline when no scratch regs are left
Summary: Instead of solving the hard problem of how to pass the callee to the indirect jump thunk without a register, just use a CSR. At a call boundary, there's nothing stopping us from using a CSR to hold the callee as long as we save and restore it in the prologue. Also, add tests for this mregparm=3 case. I wrote execution tests for __llvm_retpoline_push, but they never got committed as lit tests, either because I never rewrote them or because they got lost in merge conflicts. Reviewers: chandlerc, dwmw2 Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits Differential Revision: https://reviews.llvm.org/D43214 llvm-svn: 325049
1 parent cb8ac00 commit 91e11a8

File tree

4 files changed

+76
-72
lines changed

4 files changed

+76
-72
lines changed
 

‎llvm/lib/Target/X86/X86ISelLowering.cpp

+18-32
Original file line numberDiff line numberDiff line change
@@ -27081,9 +27081,6 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
2708127081
// attempt to help out kernels and other systems where duplicating the
2708227082
// thunks is costly.
2708327083
switch (Reg) {
27084-
case 0:
27085-
assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
27086-
return "__x86_indirect_thunk";
2708727084
case X86::EAX:
2708827085
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
2708927086
return "__x86_indirect_thunk_eax";
@@ -27093,6 +27090,9 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
2709327090
case X86::EDX:
2709427091
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
2709527092
return "__x86_indirect_thunk_edx";
27093+
case X86::EDI:
27094+
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
27095+
return "__x86_indirect_thunk_edi";
2709627096
case X86::R11:
2709727097
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
2709827098
return "__x86_indirect_thunk_r11";
@@ -27102,9 +27102,6 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
2710227102

2710327103
// When targeting an internal COMDAT thunk use an LLVM-specific name.
2710427104
switch (Reg) {
27105-
case 0:
27106-
assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
27107-
return "__llvm_retpoline_push";
2710827105
case X86::EAX:
2710927106
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
2711027107
return "__llvm_retpoline_eax";
@@ -27114,6 +27111,9 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
2711427111
case X86::EDX:
2711527112
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
2711627113
return "__llvm_retpoline_edx";
27114+
case X86::EDI:
27115+
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
27116+
return "__llvm_retpoline_edi";
2711727117
case X86::R11:
2711827118
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
2711927119
return "__llvm_retpoline_r11";
@@ -27135,15 +27135,13 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
2713527135
// just use R11, but we scan for uses anyway to ensure we don't generate
2713627136
// incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't
2713727137
// already a register use operand to the call to hold the callee. If none
27138-
// are available, push the callee instead. This is less efficient, but is
27139-
// necessary for functions using 3 regparms. Such function calls are
27140-
// (currently) not eligible for tail call optimization, because there is no
27141-
// scratch register available to hold the address of the callee.
27138+
// are available, use EDI instead. EDI is chosen because EBX is the PIC base
27139+
// register and ESI is the base pointer to realigned stack frames with VLAs.
2714227140
SmallVector<unsigned, 3> AvailableRegs;
2714327141
if (Subtarget.is64Bit())
2714427142
AvailableRegs.push_back(X86::R11);
2714527143
else
27146-
AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX});
27144+
AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI});
2714727145

2714827146
// Zero out any registers that are already used.
2714927147
for (const auto &MO : MI.operands()) {
@@ -27161,30 +27159,18 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
2716127159
break;
2716227160
}
2716327161
}
27162+
if (!AvailableReg)
27163+
report_fatal_error("calling convention incompatible with retpoline, no "
27164+
"available registers");
2716427165

2716527166
const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
2716627167

27167-
if (AvailableReg == 0) {
27168-
// No register available. Use PUSH. This must not be a tailcall, and this
27169-
// must not be x64.
27170-
if (Subtarget.is64Bit())
27171-
report_fatal_error(
27172-
"Cannot make an indirect call on x86-64 using both retpoline and a "
27173-
"calling convention that preservers r11");
27174-
if (Opc != X86::CALLpcrel32)
27175-
report_fatal_error("Cannot make an indirect tail call on x86 using "
27176-
"retpoline without a preserved register");
27177-
BuildMI(*BB, MI, DL, TII->get(X86::PUSH32r)).addReg(CalleeVReg);
27178-
MI.getOperand(0).ChangeToES(Symbol);
27179-
MI.setDesc(TII->get(Opc));
27180-
} else {
27181-
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
27182-
.addReg(CalleeVReg);
27183-
MI.getOperand(0).ChangeToES(Symbol);
27184-
MI.setDesc(TII->get(Opc));
27185-
MachineInstrBuilder(*BB->getParent(), &MI)
27186-
.addReg(AvailableReg, RegState::Implicit | RegState::Kill);
27187-
}
27168+
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
27169+
.addReg(CalleeVReg);
27170+
MI.getOperand(0).ChangeToES(Symbol);
27171+
MI.setDesc(TII->get(Opc));
27172+
MachineInstrBuilder(*BB->getParent(), &MI)
27173+
.addReg(AvailableReg, RegState::Implicit | RegState::Kill);
2718827174
return BB;
2718927175
}
2719027176

‎llvm/lib/Target/X86/X86RetpolineThunks.cpp

+11-31
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ static const char R11ThunkName[] = "__llvm_retpoline_r11";
4343
static const char EAXThunkName[] = "__llvm_retpoline_eax";
4444
static const char ECXThunkName[] = "__llvm_retpoline_ecx";
4545
static const char EDXThunkName[] = "__llvm_retpoline_edx";
46-
static const char PushThunkName[] = "__llvm_retpoline_push";
46+
static const char EDIThunkName[] = "__llvm_retpoline_edi";
4747

4848
namespace {
4949
class X86RetpolineThunks : public MachineFunctionPass {
@@ -127,7 +127,7 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
127127
createThunkFunction(M, R11ThunkName);
128128
else
129129
for (StringRef Name :
130-
{EAXThunkName, ECXThunkName, EDXThunkName, PushThunkName})
130+
{EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName})
131131
createThunkFunction(M, Name);
132132
InsertedThunks = true;
133133
return true;
@@ -151,9 +151,8 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
151151
populateThunk(MF, X86::R11);
152152
} else {
153153
// For 32-bit targets we need to emit a collection of thunks for various
154-
// possible scratch registers as well as a fallback that is used when
155-
// there are no scratch registers and assumes the retpoline target has
156-
// been pushed.
154+
// possible scratch registers as well as a fallback that uses EDI, which is
155+
// normally callee saved.
157156
// __llvm_retpoline_eax:
158157
// calll .Leax_call_target
159158
// .Leax_capture_spec:
@@ -174,32 +173,18 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
174173
// movl %edx, (%esp)
175174
// retl
176175
//
177-
// This last one is a bit more special and so needs a little extra
178-
// handling.
179-
// __llvm_retpoline_push:
180-
// calll .Lpush_call_target
181-
// .Lpush_capture_spec:
182-
// pause
183-
// lfence
184-
// jmp .Lpush_capture_spec
185-
// .align 16
186-
// .Lpush_call_target:
187-
// # Clear pause_loop return address.
188-
// addl $4, %esp
189-
// # Top of stack words are: Callee, RA. Exchange Callee and RA.
190-
// pushl 4(%esp) # Push callee
191-
// pushl 4(%esp) # Push RA
192-
// popl 8(%esp) # Pop RA to final RA
193-
// popl (%esp) # Pop callee to next top of stack
194-
// retl # Ret to callee
176+
// __llvm_retpoline_edi:
177+
// ... # Same setup
178+
// movl %edi, (%esp)
179+
// retl
195180
if (MF.getName() == EAXThunkName)
196181
populateThunk(MF, X86::EAX);
197182
else if (MF.getName() == ECXThunkName)
198183
populateThunk(MF, X86::ECX);
199184
else if (MF.getName() == EDXThunkName)
200185
populateThunk(MF, X86::EDX);
201-
else if (MF.getName() == PushThunkName)
202-
populateThunk(MF);
186+
else if (MF.getName() == EDIThunkName)
187+
populateThunk(MF, X86::EDI);
203188
else
204189
llvm_unreachable("Invalid thunk name on x86-32!");
205190
}
@@ -301,11 +286,6 @@ void X86RetpolineThunks::populateThunk(MachineFunction &MF,
301286
CaptureSpec->addSuccessor(CaptureSpec);
302287

303288
CallTarget->setAlignment(4);
304-
if (Reg) {
305-
insertRegReturnAddrClobber(*CallTarget, *Reg);
306-
} else {
307-
assert(!Is64Bit && "We only support non-reg thunks on 32-bit x86!");
308-
insert32BitPushReturnAddrClobber(*CallTarget);
309-
}
289+
insertRegReturnAddrClobber(*CallTarget, *Reg);
310290
BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
311291
}
+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; RUN: llc -mtriple=i686-linux < %s | FileCheck --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" %s
2+
3+
; Test 32-bit retpoline when -mregparm=3 is used. This case is interesting
4+
; because there are no available scratch registers. The Linux kernel builds
5+
; with -mregparm=3, so we need to support it. TCO should fail because we need
6+
; to restore EDI.
7+
8+
define void @call_edi(void (i32, i32, i32)* %fp) #0 {
9+
entry:
10+
tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
11+
ret void
12+
}
13+
14+
; CHECK-LABEL: call_edi:
15+
; EDI is used, so it must be saved.
16+
; CHECK: pushl %edi
17+
; CHECK-DAG: xorl %eax, %eax
18+
; CHECK-DAG: xorl %edx, %edx
19+
; CHECK-DAG: xorl %ecx, %ecx
20+
; CHECK-DAG: movl {{.*}}, %edi
21+
; CHECK: calll __llvm_retpoline_edi
22+
; CHECK: popl %edi
23+
; CHECK: retl
24+
25+
define void @edi_external(void (i32, i32, i32)* %fp) #1 {
26+
entry:
27+
tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
28+
ret void
29+
}
30+
31+
; CHECK-LABEL: edi_external:
32+
; CHECK: pushl %edi
33+
; CHECK-DAG: xorl %eax, %eax
34+
; CHECK-DAG: xorl %edx, %edx
35+
; CHECK-DAG: xorl %ecx, %ecx
36+
; CHECK-DAG: movl {{.*}}, %edi
37+
; CHECK: calll __x86_indirect_thunk_edi
38+
; CHECK: popl %edi
39+
; CHECK: retl
40+
41+
attributes #0 = { "target-features"="+retpoline" }
42+
attributes #1 = { "target-features"="+retpoline-external-thunk" }

‎llvm/test/CodeGen/X86/retpoline.ll

+5-9
Original file line numberDiff line numberDiff line change
@@ -340,10 +340,10 @@ latch:
340340
; X86-NEXT: movl %edx, (%esp)
341341
; X86-NEXT: retl
342342
;
343-
; X86-LABEL: .section .text.__llvm_retpoline_push,{{.*}},__llvm_retpoline_push,comdat
344-
; X86-NEXT: .hidden __llvm_retpoline_push
345-
; X86-NEXT: .weak __llvm_retpoline_push
346-
; X86: __llvm_retpoline_push:
343+
; X86-LABEL: .section .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat
344+
; X86-NEXT: .hidden __llvm_retpoline_edi
345+
; X86-NEXT: .weak __llvm_retpoline_edi
346+
; X86: __llvm_retpoline_edi:
347347
; X86-NEXT: # {{.*}} # %entry
348348
; X86-NEXT: calll [[CALL_TARGET:.*]]
349349
; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
@@ -355,11 +355,7 @@ latch:
355355
; X86-NEXT: .p2align 4, 0x90
356356
; X86-NEXT: [[CALL_TARGET]]: # Block address taken
357357
; X86-NEXT: # %entry
358-
; X86-NEXT: addl $4, %esp
359-
; X86-NEXT: pushl 4(%esp)
360-
; X86-NEXT: pushl 4(%esp)
361-
; X86-NEXT: popl 8(%esp)
362-
; X86-NEXT: popl (%esp)
358+
; X86-NEXT: movl %edi, (%esp)
363359
; X86-NEXT: retl
364360

365361

0 commit comments

Comments
 (0)
Please sign in to comment.