Index: llvm/trunk/lib/Target/X86/X86CallingConv.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86CallingConv.cpp +++ llvm/trunk/lib/Target/X86/X86CallingConv.cpp @@ -287,5 +287,45 @@ return true; } +/// X86 interrupt handlers can only take one or two stack arguments, but if +/// there are two arguments, they are in the opposite order from the standard +/// convention. Therefore, we have to look at the argument count up front before +/// allocating stack for each argument. +static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + const MachineFunction &MF = State.getMachineFunction(); + size_t ArgCount = State.getMachineFunction().getFunction().arg_size(); + bool Is64Bit = static_cast(MF.getSubtarget()).is64Bit(); + unsigned SlotSize = Is64Bit ? 8 : 4; + unsigned Offset; + if (ArgCount == 1 && ValNo == 0) { + // If we have one argument, the argument is five stack slots big, at fixed + // offset zero. + Offset = State.AllocateStack(5 * SlotSize, 4); + } else if (ArgCount == 2 && ValNo == 0) { + // If we have two arguments, the stack slot is *after* the error code + // argument. Pretend it doesn't consume stack space, and account for it when + // we assign the second argument. + Offset = SlotSize; + } else if (ArgCount == 2 && ValNo == 1) { + // If this is the second of two arguments, it must be the error code. It + // appears first on the stack, and is then followed by the five slot + // interrupt struct. + Offset = 0; + (void)State.AllocateStack(6 * SlotSize, 4); + } else { + report_fatal_error("unsupported x86 interrupt prototype"); + } + + // FIXME: This should be accounted for in + // X86FrameLowering::getFrameIndexReference, not here. + if (Is64Bit && ArgCount == 2) + Offset += SlotSize; + + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return true; +} + // Provides entry points of CC_X86 and RetCC_X86. #include "X86GenCallingConv.inc" Index: llvm/trunk/lib/Target/X86/X86CallingConv.td =================================================================== --- llvm/trunk/lib/Target/X86/X86CallingConv.td +++ llvm/trunk/lib/Target/X86/X86CallingConv.td @@ -985,14 +985,6 @@ CCDelegateTo ]>; -def CC_X86_32_Intr : CallingConv<[ - CCAssignToStack<4, 4> -]>; - -def CC_X86_64_Intr : CallingConv<[ - CCAssignToStack<8, 8> -]>; - //===----------------------------------------------------------------------===// // X86 Root Argument Calling Conventions //===----------------------------------------------------------------------===// @@ -1001,7 +993,7 @@ def CC_X86_32 : CallingConv<[ // X86_INTR calling convention is valid in MCU target and should override the // MCU calling convention. Thus, this should be checked before isTargetMCU(). - CCIfCC<"CallingConv::X86_INTR", CCDelegateTo>, + CCIfCC<"CallingConv::X86_INTR", CCCustom<"CC_X86_Intr">>, CCIfSubtarget<"isTargetMCU()", CCDelegateTo>, CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo>, CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo>, @@ -1029,7 +1021,7 @@ CCIfCC<"CallingConv::X86_RegCall", CCIfSubtarget<"isTargetWin64()", CCDelegateTo>>, CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo>, - CCIfCC<"CallingConv::X86_INTR", CCDelegateTo>, + CCIfCC<"CallingConv::X86_INTR", CCCustom<"CC_X86_Intr">>, // Mingw64 and native Win64 use Win64 CC CCIfSubtarget<"isTargetWin64()", CCDelegateTo>, Index: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp +++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp @@ -1773,6 +1773,15 @@ bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); int64_t FPDelta = 0; + // In an x86 interrupt, remove the offset we added to account for the return + // address from any stack object allocated in the caller's frame. Interrupts + // do not have a standard return address. Fixed objects in the current frame, + // such as SSE register spills, should not get this treatment. + if (MF.getFunction().getCallingConv() == CallingConv::X86_INTR && + Offset >= 0) { + Offset += getOffsetOfLocalArea(); + } + if (IsWin64Prologue) { assert(!MFI.hasCalls() || (StackSize % 16) == 8); Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -2976,22 +2976,6 @@ else ValVT = VA.getValVT(); - // Calculate SP offset of interrupt parameter, re-arrange the slot normally - // taken by a return address. - int Offset = 0; - if (CallConv == CallingConv::X86_INTR) { - // X86 interrupts may take one or two arguments. - // On the stack there will be no return address as in regular call. - // Offset of last argument need to be set to -4/-8 bytes. - // Where offset of the first argument out of two, should be set to 0 bytes. - Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1); - if (Subtarget.is64Bit() && Ins.size() == 2) { - // The stack pointer needs to be realigned for 64 bit handlers with error - // code, so the argument offset changes by 8 bytes. - Offset += 8; - } - } - // FIXME: For now, all byval parameter objects are marked mutable. This can be // changed with more analysis. // In case of tail call optimization mark all arguments mutable. Since they @@ -3004,10 +2988,6 @@ // can be improved with deeper analysis. int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable, /*isAliased=*/true); - // Adjust SP offset of interrupt parameter. - if (CallConv == CallingConv::X86_INTR) { - MFI.setObjectOffset(FI, Offset); - } return DAG.getFrameIndex(FI, PtrVT); } @@ -3062,11 +3042,6 @@ MFI.setObjectSExt(FI, true); } - // Adjust SP offset of interrupt parameter. - if (CallConv == CallingConv::X86_INTR) { - MFI.setObjectOffset(FI, Offset); - } - SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getLoad( ValVT, dl, Chain, FIN, @@ -3156,14 +3131,6 @@ !(isVarArg && canGuaranteeTCO(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"); - if (CallConv == CallingConv::X86_INTR) { - bool isLegal = Ins.size() == 1 || - (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) || - (!Is64Bit && Ins[1].VT == MVT::i32))); - if (!isLegal) - report_fatal_error("X86 interrupts may take one or two arguments"); - } - // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); Index: llvm/trunk/test/CodeGen/X86/x86-32-intrcc.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/x86-32-intrcc.ll +++ llvm/trunk/test/CodeGen/X86/x86-32-intrcc.ll @@ -3,7 +3,9 @@ %struct.interrupt_frame = type { i32, i32, i32, i32, i32 } -@llvm.used = appending global [4 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_clobbers to i8*), i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_x87 to i8*)], section "llvm.metadata" +@sink_address = global i32* null +@sink_i32 = global i32 0 + ; Spills eax, putting original esp at +4. ; No stack adjustment if declared with no error code @@ -93,3 +95,67 @@ store x86_fp80 %add, x86_fp80* @f80, align 4 ret void } + +; Use a frame pointer to check the offsets. No return address, arguments start +; at EBP+4. +define dso_local x86_intrcc void @test_fp_1(%struct.interrupt_frame* %p) #0 { + ; CHECK-LABEL: test_fp_1: + ; CHECK: # %bb.0: # %entry + ; CHECK-NEXT: pushl %ebp + ; CHECK-NEXT: movl %esp, %ebp + ; CHECK: cld + ; CHECK-DAG: leal 4(%ebp), %[[R1:[^ ]*]] + ; CHECK-DAG: leal 20(%ebp), %[[R2:[^ ]*]] + ; CHECK: movl %[[R1]], sink_address + ; CHECK: movl %[[R2]], sink_address + ; CHECK: popl %ebp + ; CHECK: iretl +entry: + %arrayidx = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i32 0, i32 0 + %arrayidx2 = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i32 0, i32 4 + store volatile i32* %arrayidx, i32** @sink_address + store volatile i32* %arrayidx2, i32** @sink_address + ret void +} + +; The error code is between EBP and the interrupt_frame. +define dso_local x86_intrcc void @test_fp_2(%struct.interrupt_frame* %p, i32 %err) #0 { + ; CHECK-LABEL: test_fp_2: + ; CHECK: # %bb.0: # %entry + ; CHECK-NEXT: pushl %ebp + ; CHECK-NEXT: movl %esp, %ebp + ; CHECK: cld + ; CHECK-DAG: movl 4(%ebp), %[[R3:[^ ]*]] + ; CHECK-DAG: leal 8(%ebp), %[[R1:[^ ]*]] + ; CHECK-DAG: leal 24(%ebp), %[[R2:[^ ]*]] + ; CHECK: movl %[[R1]], sink_address + ; CHECK: movl %[[R2]], sink_address + ; CHECK: movl %[[R3]], sink_i32 + ; CHECK: popl %ebp + ; CHECK: iretl +entry: + %arrayidx = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i32 0, i32 0 + %arrayidx2 = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i32 0, i32 4 + store volatile i32* %arrayidx, i32** @sink_address + store volatile i32* %arrayidx2, i32** @sink_address + store volatile i32 %err, i32* @sink_i32 + ret void +} + +; Test argument copy elision when copied to a local alloca. +define x86_intrcc void @test_copy_elide(%struct.interrupt_frame* %frame, i32 %err) #0 { + ; CHECK-LABEL: test_copy_elide: + ; CHECK: # %bb.0: # %entry + ; CHECK-NEXT: pushl %ebp + ; CHECK-NEXT: movl %esp, %ebp + ; CHECK: cld + ; CHECK: leal 4(%ebp), %[[R1:[^ ]*]] + ; CHECK: movl %[[R1]], sink_address +entry: + %err.addr = alloca i32, align 4 + store i32 %err, i32* %err.addr, align 4 + store volatile i32* %err.addr, i32** @sink_address + ret void +} + +attributes #0 = { nounwind "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" } Index: llvm/trunk/test/CodeGen/X86/x86-64-intrcc.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/x86-64-intrcc.ll +++ llvm/trunk/test/CodeGen/X86/x86-64-intrcc.ll @@ -3,7 +3,8 @@ %struct.interrupt_frame = type { i64, i64, i64, i64, i64 } -@llvm.used = appending global [4 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_clobbers to i8*), i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_x87 to i8*)], section "llvm.metadata" +@sink_address = global i64* null +@sink_i32 = global i64 0 ; Spills rax, putting original esp at +8. ; No stack adjustment if declared with no error code @@ -105,3 +106,75 @@ store x86_fp80 %add, x86_fp80* @f80, align 4 ret void } + +; Use a frame pointer to check the offsets. No return address, arguments start +; at RBP+4. +define dso_local x86_intrcc void @test_fp_1(%struct.interrupt_frame* %p) #0 { + ; CHECK-LABEL: test_fp_1: + ; CHECK: # %bb.0: # %entry + ; CHECK-NEXT: pushq %rbp + ; CHECK-NEXT: movq %rsp, %rbp + ; CHECK: cld + ; CHECK-DAG: leaq 8(%rbp), %[[R1:[^ ]*]] + ; CHECK-DAG: leaq 40(%rbp), %[[R2:[^ ]*]] + ; CHECK: movq %[[R1]], sink_address + ; CHECK: movq %[[R2]], sink_address + ; CHECK: popq %rbp + ; CHECK: iretq +entry: + %arrayidx = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i64 0, i32 0 + %arrayidx2 = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i64 0, i32 4 + store volatile i64* %arrayidx, i64** @sink_address + store volatile i64* %arrayidx2, i64** @sink_address + ret void +} + +; The error code is between RBP and the interrupt_frame. +define dso_local x86_intrcc void @test_fp_2(%struct.interrupt_frame* %p, i64 %err) #0 { + ; CHECK-LABEL: test_fp_2: + ; CHECK: # %bb.0: # %entry + ; This RAX push is just to align the stack. + ; CHECK-NEXT: pushq %rax + ; CHECK-NEXT: pushq %rbp + ; CHECK-NEXT: movq %rsp, %rbp + ; CHECK: cld + ; CHECK-DAG: movq 16(%rbp), %[[R3:[^ ]*]] + ; CHECK-DAG: leaq 24(%rbp), %[[R1:[^ ]*]] + ; CHECK-DAG: leaq 56(%rbp), %[[R2:[^ ]*]] + ; CHECK: movq %[[R1]], sink_address(%rip) + ; CHECK: movq %[[R2]], sink_address(%rip) + ; CHECK: movq %[[R3]], sink_i32(%rip) + ; CHECK: popq %rbp + ; Pop off both the error code and the 8 byte alignment adjustment from the + ; prologue. + ; CHECK: addq $16, %rsp + ; CHECK: iretq +entry: + %arrayidx = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i64 0, i32 0 + %arrayidx2 = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i64 0, i32 4 + store volatile i64* %arrayidx, i64** @sink_address + store volatile i64* %arrayidx2, i64** @sink_address + store volatile i64 %err, i64* @sink_i32 + ret void +} + +; Test argument copy elision when copied to a local alloca. +define x86_intrcc void @test_copy_elide(%struct.interrupt_frame* %frame, i64 %err) #0 { + ; CHECK-LABEL: test_copy_elide: + ; CHECK: # %bb.0: # %entry + ; This RAX push is just to align the stack. + ; CHECK-NEXT: pushq %rax + ; CHECK-NEXT: pushq %rbp + ; CHECK-NEXT: movq %rsp, %rbp + ; CHECK: cld + ; CHECK: leaq 16(%rbp), %[[R1:[^ ]*]] + ; CHECK: movq %[[R1]], sink_address(%rip) +entry: + %err.addr = alloca i64, align 4 + store i64 %err, i64* %err.addr, align 4 + store volatile i64* %err.addr, i64** @sink_address + ret void +} + + +attributes #0 = { nounwind "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }