Skip to content

Commit 2f055f0

Browse files
committedFeb 26, 2019
[X86] Fix bug in x86_intrcc with arg copy elision
Summary: Use a custom calling convention handler for interrupts instead of fixing up the locations in LowerMemArgument. This way, the offsets are correct when constructed and we don't need to account for them in as many places. Depends on D56883 Replaces D56275 Reviewers: craig.topper, phil-opp Subscribers: hiraditya, llvm-commits Differential Revision: https://reviews.llvm.org/D56944 llvm-svn: 354837
1 parent b4e16e6 commit 2f055f0

File tree

6 files changed

+192
-45
lines changed

6 files changed

+192
-45
lines changed
 

‎llvm/lib/Target/X86/X86CallingConv.cpp

+40
Original file line numberDiff line numberDiff line change
@@ -287,5 +287,45 @@ static bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
287287
return true;
288288
}
289289

290+
/// X86 interrupt handlers can only take one or two stack arguments, but if
291+
/// there are two arguments, they are in the opposite order from the standard
292+
/// convention. Therefore, we have to look at the argument count up front before
293+
/// allocating stack for each argument.
294+
static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
295+
CCValAssign::LocInfo &LocInfo,
296+
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
297+
const MachineFunction &MF = State.getMachineFunction();
298+
size_t ArgCount = State.getMachineFunction().getFunction().arg_size();
299+
bool Is64Bit = static_cast<const X86Subtarget &>(MF.getSubtarget()).is64Bit();
300+
unsigned SlotSize = Is64Bit ? 8 : 4;
301+
unsigned Offset;
302+
if (ArgCount == 1 && ValNo == 0) {
303+
// If we have one argument, the argument is five stack slots big, at fixed
304+
// offset zero.
305+
Offset = State.AllocateStack(5 * SlotSize, 4);
306+
} else if (ArgCount == 2 && ValNo == 0) {
307+
// If we have two arguments, the stack slot is *after* the error code
308+
// argument. Pretend it doesn't consume stack space, and account for it when
309+
// we assign the second argument.
310+
Offset = SlotSize;
311+
} else if (ArgCount == 2 && ValNo == 1) {
312+
// If this is the second of two arguments, it must be the error code. It
313+
// appears first on the stack, and is then followed by the five slot
314+
// interrupt struct.
315+
Offset = 0;
316+
(void)State.AllocateStack(6 * SlotSize, 4);
317+
} else {
318+
report_fatal_error("unsupported x86 interrupt prototype");
319+
}
320+
321+
// FIXME: This should be accounted for in
322+
// X86FrameLowering::getFrameIndexReference, not here.
323+
if (Is64Bit && ArgCount == 2)
324+
Offset += SlotSize;
325+
326+
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
327+
return true;
328+
}
329+
290330
// Provides entry points of CC_X86 and RetCC_X86.
291331
#include "X86GenCallingConv.inc"

‎llvm/lib/Target/X86/X86CallingConv.td

+2-10
Original file line numberDiff line numberDiff line change
@@ -985,14 +985,6 @@ def CC_Intel_OCL_BI : CallingConv<[
985985
CCDelegateTo<CC_X86_32_C>
986986
]>;
987987

988-
def CC_X86_32_Intr : CallingConv<[
989-
CCAssignToStack<4, 4>
990-
]>;
991-
992-
def CC_X86_64_Intr : CallingConv<[
993-
CCAssignToStack<8, 8>
994-
]>;
995-
996988
//===----------------------------------------------------------------------===//
997989
// X86 Root Argument Calling Conventions
998990
//===----------------------------------------------------------------------===//
@@ -1001,7 +993,7 @@ def CC_X86_64_Intr : CallingConv<[
1001993
def CC_X86_32 : CallingConv<[
1002994
// X86_INTR calling convention is valid in MCU target and should override the
1003995
// MCU calling convention. Thus, this should be checked before isTargetMCU().
1004-
CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_32_Intr>>,
996+
CCIfCC<"CallingConv::X86_INTR", CCCustom<"CC_X86_Intr">>,
1005997
CCIfSubtarget<"isTargetMCU()", CCDelegateTo<CC_X86_32_MCU>>,
1006998
CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
1007999
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win32_VectorCall>>,
@@ -1029,7 +1021,7 @@ def CC_X86_64 : CallingConv<[
10291021
CCIfCC<"CallingConv::X86_RegCall",
10301022
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_RegCall>>>,
10311023
CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_SysV64_RegCall>>,
1032-
CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_64_Intr>>,
1024+
CCIfCC<"CallingConv::X86_INTR", CCCustom<"CC_X86_Intr">>,
10331025

10341026
// Mingw64 and native Win64 use Win64 CC
10351027
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,

‎llvm/lib/Target/X86/X86FrameLowering.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -1773,6 +1773,15 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
17731773
bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
17741774
int64_t FPDelta = 0;
17751775

1776+
// In an x86 interrupt, remove the offset we added to account for the return
1777+
// address from any stack object allocated in the caller's frame. Interrupts
1778+
// do not have a standard return address. Fixed objects in the current frame,
1779+
// such as SSE register spills, should not get this treatment.
1780+
if (MF.getFunction().getCallingConv() == CallingConv::X86_INTR &&
1781+
Offset >= 0) {
1782+
Offset += getOffsetOfLocalArea();
1783+
}
1784+
17761785
if (IsWin64Prologue) {
17771786
assert(!MFI.hasCalls() || (StackSize % 16) == 8);
17781787

‎llvm/lib/Target/X86/X86ISelLowering.cpp

-33
Original file line numberDiff line numberDiff line change
@@ -2976,22 +2976,6 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
29762976
else
29772977
ValVT = VA.getValVT();
29782978

2979-
// Calculate SP offset of interrupt parameter, re-arrange the slot normally
2980-
// taken by a return address.
2981-
int Offset = 0;
2982-
if (CallConv == CallingConv::X86_INTR) {
2983-
// X86 interrupts may take one or two arguments.
2984-
// On the stack there will be no return address as in regular call.
2985-
// Offset of last argument need to be set to -4/-8 bytes.
2986-
// Where offset of the first argument out of two, should be set to 0 bytes.
2987-
Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
2988-
if (Subtarget.is64Bit() && Ins.size() == 2) {
2989-
// The stack pointer needs to be realigned for 64 bit handlers with error
2990-
// code, so the argument offset changes by 8 bytes.
2991-
Offset += 8;
2992-
}
2993-
}
2994-
29952979
// FIXME: For now, all byval parameter objects are marked mutable. This can be
29962980
// changed with more analysis.
29972981
// In case of tail call optimization mark all arguments mutable. Since they
@@ -3004,10 +2988,6 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
30042988
// can be improved with deeper analysis.
30052989
int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
30062990
/*isAliased=*/true);
3007-
// Adjust SP offset of interrupt parameter.
3008-
if (CallConv == CallingConv::X86_INTR) {
3009-
MFI.setObjectOffset(FI, Offset);
3010-
}
30112991
return DAG.getFrameIndex(FI, PtrVT);
30122992
}
30132993

@@ -3062,11 +3042,6 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
30623042
MFI.setObjectSExt(FI, true);
30633043
}
30643044

3065-
// Adjust SP offset of interrupt parameter.
3066-
if (CallConv == CallingConv::X86_INTR) {
3067-
MFI.setObjectOffset(FI, Offset);
3068-
}
3069-
30703045
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
30713046
SDValue Val = DAG.getLoad(
30723047
ValVT, dl, Chain, FIN,
@@ -3156,14 +3131,6 @@ SDValue X86TargetLowering::LowerFormalArguments(
31563131
!(isVarArg && canGuaranteeTCO(CallConv)) &&
31573132
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
31583133

3159-
if (CallConv == CallingConv::X86_INTR) {
3160-
bool isLegal = Ins.size() == 1 ||
3161-
(Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
3162-
(!Is64Bit && Ins[1].VT == MVT::i32)));
3163-
if (!isLegal)
3164-
report_fatal_error("X86 interrupts may take one or two arguments");
3165-
}
3166-
31673134
// Assign locations to all of the incoming arguments.
31683135
SmallVector<CCValAssign, 16> ArgLocs;
31693136
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());

‎llvm/test/CodeGen/X86/x86-32-intrcc.ll

+67-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33

44
%struct.interrupt_frame = type { i32, i32, i32, i32, i32 }
55

6-
@llvm.used = appending global [4 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_clobbers to i8*), i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_x87 to i8*)], section "llvm.metadata"
6+
@sink_address = global i32* null
7+
@sink_i32 = global i32 0
8+
79

810
; Spills eax, putting original esp at +4.
911
; No stack adjustment if declared with no error code
@@ -93,3 +95,67 @@ entry:
9395
store x86_fp80 %add, x86_fp80* @f80, align 4
9496
ret void
9597
}
98+
99+
; Use a frame pointer to check the offsets. No return address, arguments start
100+
; at EBP+4.
101+
define dso_local x86_intrcc void @test_fp_1(%struct.interrupt_frame* %p) #0 {
102+
; CHECK-LABEL: test_fp_1:
103+
; CHECK: # %bb.0: # %entry
104+
; CHECK-NEXT: pushl %ebp
105+
; CHECK-NEXT: movl %esp, %ebp
106+
; CHECK: cld
107+
; CHECK-DAG: leal 4(%ebp), %[[R1:[^ ]*]]
108+
; CHECK-DAG: leal 20(%ebp), %[[R2:[^ ]*]]
109+
; CHECK: movl %[[R1]], sink_address
110+
; CHECK: movl %[[R2]], sink_address
111+
; CHECK: popl %ebp
112+
; CHECK: iretl
113+
entry:
114+
%arrayidx = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i32 0, i32 0
115+
%arrayidx2 = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i32 0, i32 4
116+
store volatile i32* %arrayidx, i32** @sink_address
117+
store volatile i32* %arrayidx2, i32** @sink_address
118+
ret void
119+
}
120+
121+
; The error code is between EBP and the interrupt_frame.
122+
define dso_local x86_intrcc void @test_fp_2(%struct.interrupt_frame* %p, i32 %err) #0 {
123+
; CHECK-LABEL: test_fp_2:
124+
; CHECK: # %bb.0: # %entry
125+
; CHECK-NEXT: pushl %ebp
126+
; CHECK-NEXT: movl %esp, %ebp
127+
; CHECK: cld
128+
; CHECK-DAG: movl 4(%ebp), %[[R3:[^ ]*]]
129+
; CHECK-DAG: leal 8(%ebp), %[[R1:[^ ]*]]
130+
; CHECK-DAG: leal 24(%ebp), %[[R2:[^ ]*]]
131+
; CHECK: movl %[[R1]], sink_address
132+
; CHECK: movl %[[R2]], sink_address
133+
; CHECK: movl %[[R3]], sink_i32
134+
; CHECK: popl %ebp
135+
; CHECK: iretl
136+
entry:
137+
%arrayidx = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i32 0, i32 0
138+
%arrayidx2 = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i32 0, i32 4
139+
store volatile i32* %arrayidx, i32** @sink_address
140+
store volatile i32* %arrayidx2, i32** @sink_address
141+
store volatile i32 %err, i32* @sink_i32
142+
ret void
143+
}
144+
145+
; Test argument copy elision when copied to a local alloca.
146+
define x86_intrcc void @test_copy_elide(%struct.interrupt_frame* %frame, i32 %err) #0 {
147+
; CHECK-LABEL: test_copy_elide:
148+
; CHECK: # %bb.0: # %entry
149+
; CHECK-NEXT: pushl %ebp
150+
; CHECK-NEXT: movl %esp, %ebp
151+
; CHECK: cld
152+
; CHECK: leal 4(%ebp), %[[R1:[^ ]*]]
153+
; CHECK: movl %[[R1]], sink_address
154+
entry:
155+
%err.addr = alloca i32, align 4
156+
store i32 %err, i32* %err.addr, align 4
157+
store volatile i32* %err.addr, i32** @sink_address
158+
ret void
159+
}
160+
161+
attributes #0 = { nounwind "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }

‎llvm/test/CodeGen/X86/x86-64-intrcc.ll

+74-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33

44
%struct.interrupt_frame = type { i64, i64, i64, i64, i64 }
55

6-
@llvm.used = appending global [4 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_clobbers to i8*), i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_x87 to i8*)], section "llvm.metadata"
6+
@sink_address = global i64* null
7+
@sink_i32 = global i64 0
78

89
; Spills rax, putting original esp at +8.
910
; No stack adjustment if declared with no error code
@@ -105,3 +106,75 @@ entry:
105106
store x86_fp80 %add, x86_fp80* @f80, align 4
106107
ret void
107108
}
109+
110+
; Use a frame pointer to check the offsets. No return address, arguments start
111+
; at RBP+4.
112+
define dso_local x86_intrcc void @test_fp_1(%struct.interrupt_frame* %p) #0 {
113+
; CHECK-LABEL: test_fp_1:
114+
; CHECK: # %bb.0: # %entry
115+
; CHECK-NEXT: pushq %rbp
116+
; CHECK-NEXT: movq %rsp, %rbp
117+
; CHECK: cld
118+
; CHECK-DAG: leaq 8(%rbp), %[[R1:[^ ]*]]
119+
; CHECK-DAG: leaq 40(%rbp), %[[R2:[^ ]*]]
120+
; CHECK: movq %[[R1]], sink_address
121+
; CHECK: movq %[[R2]], sink_address
122+
; CHECK: popq %rbp
123+
; CHECK: iretq
124+
entry:
125+
%arrayidx = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i64 0, i32 0
126+
%arrayidx2 = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i64 0, i32 4
127+
store volatile i64* %arrayidx, i64** @sink_address
128+
store volatile i64* %arrayidx2, i64** @sink_address
129+
ret void
130+
}
131+
132+
; The error code is between RBP and the interrupt_frame.
133+
define dso_local x86_intrcc void @test_fp_2(%struct.interrupt_frame* %p, i64 %err) #0 {
134+
; CHECK-LABEL: test_fp_2:
135+
; CHECK: # %bb.0: # %entry
136+
; This RAX push is just to align the stack.
137+
; CHECK-NEXT: pushq %rax
138+
; CHECK-NEXT: pushq %rbp
139+
; CHECK-NEXT: movq %rsp, %rbp
140+
; CHECK: cld
141+
; CHECK-DAG: movq 16(%rbp), %[[R3:[^ ]*]]
142+
; CHECK-DAG: leaq 24(%rbp), %[[R1:[^ ]*]]
143+
; CHECK-DAG: leaq 56(%rbp), %[[R2:[^ ]*]]
144+
; CHECK: movq %[[R1]], sink_address(%rip)
145+
; CHECK: movq %[[R2]], sink_address(%rip)
146+
; CHECK: movq %[[R3]], sink_i32(%rip)
147+
; CHECK: popq %rbp
148+
; Pop off both the error code and the 8 byte alignment adjustment from the
149+
; prologue.
150+
; CHECK: addq $16, %rsp
151+
; CHECK: iretq
152+
entry:
153+
%arrayidx = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i64 0, i32 0
154+
%arrayidx2 = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i64 0, i32 4
155+
store volatile i64* %arrayidx, i64** @sink_address
156+
store volatile i64* %arrayidx2, i64** @sink_address
157+
store volatile i64 %err, i64* @sink_i32
158+
ret void
159+
}
160+
161+
; Test argument copy elision when copied to a local alloca.
162+
define x86_intrcc void @test_copy_elide(%struct.interrupt_frame* %frame, i64 %err) #0 {
163+
; CHECK-LABEL: test_copy_elide:
164+
; CHECK: # %bb.0: # %entry
165+
; This RAX push is just to align the stack.
166+
; CHECK-NEXT: pushq %rax
167+
; CHECK-NEXT: pushq %rbp
168+
; CHECK-NEXT: movq %rsp, %rbp
169+
; CHECK: cld
170+
; CHECK: leaq 16(%rbp), %[[R1:[^ ]*]]
171+
; CHECK: movq %[[R1]], sink_address(%rip)
172+
entry:
173+
%err.addr = alloca i64, align 4
174+
store i64 %err, i64* %err.addr, align 4
175+
store volatile i64* %err.addr, i64** @sink_address
176+
ret void
177+
}
178+
179+
180+
attributes #0 = { nounwind "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }

0 commit comments

Comments
 (0)
Please sign in to comment.