Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1378,6 +1378,14 @@ TLI.getFrameIndexTy(DAG.getDataLayout())); } + // If this is a function argument at a static frame index, generate it as + // the frame index. + if (const Argument *Arg = dyn_cast(V)) { + int FI = FuncInfo.getArgumentFrameIndex(Arg); + if (FI != INT_MAX) + return DAG.getFrameIndex(FI, TLI.getFrameIndexTy(DAG.getDataLayout())); + } + // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast(V)) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); @@ -9235,8 +9243,8 @@ continue; // Note down frame index. - if (FrameIndexSDNode *FI = - dyn_cast(ArgValues[0].getNode())) + FrameIndexSDNode *FI = dyn_cast(ArgValues[0].getNode()); + if (FI) FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), @@ -9280,7 +9288,8 @@ continue; } } - if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) { + if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel) && !FI) { + // Excluding FrameIndex as it is accessible in other BB without a VReg. FuncInfo->InitializeRegForValue(&Arg); SDB->CopyToExportRegsIfNeeded(&Arg); } Index: test/CodeGen/AMDGPU/frame-index-elimination.ll =================================================================== --- test/CodeGen/AMDGPU/frame-index-elimination.ll +++ test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -129,21 +129,21 @@ ; FrameIndex is hidden behind a CopyFromReg in the second block. ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_nonentry_block: +; GCN: s_and_saveexec_b64 + +; CI: buffer_load_dword v1, off, s[0:3], s5 offset:8{{$}} + +; GFX9: buffer_load_dword v1, off, s[0:3], s5 offset:8{{$}} + ; GCN: s_sub_u32 [[SUB_OFFSET:s[0-9]+]], s5, s4 ; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], [[SUB_OFFSET]], 6 ; CI: v_add_i32_e64 [[ADD:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 4, [[SHIFT]] +; CI: v_add_i32_e32 v0, vcc, 4, [[ADD]] ; GFX9: v_lshrrev_b32_e64 [[SHIFT:v[0-9]+]], 6, [[SUB_OFFSET]] ; GFX9: v_add_u32_e32 [[ADD:v[0-9]+]], 4, [[SHIFT]] - -; GCN: s_and_saveexec_b64 - -; CI: v_add_i32_e32 v0, vcc, 4, [[ADD]] -; CI: buffer_load_dword v1, v1, s[0:3], s4 offen offset:4{{$}} - ; GFX9: v_add_u32_e32 v0, 4, [[ADD]] -; GFX9: buffer_load_dword v1, v{{[0-9]+}}, s[0:3], s4 offen offset:4{{$}} ; GCN: ds_write_b32 define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 } addrspace(5)* byval %arg0, i32 %arg2) #0 { Index: test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll =================================================================== --- test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll +++ /dev/null @@ -1,141 +0,0 @@ -; RUN: llc < %s -O0 -regalloc=fast -relocation-model=pic -disable-fp-elim | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" -target triple = "i386-apple-darwin10.0.0" - -%struct.S = type { [2 x i8*] } - -@_ZTIi = external constant i8* ; [#uses=1] -@.str = internal constant [4 x i8] c"%p\0A\00" ; <[4 x i8]*> [#uses=1] -@llvm.used = appending global [1 x i8*] [i8* bitcast (i8* (%struct.S*, i32, %struct.S*)* @_Z4test1SiS_ to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] - -; Verify that %s1 gets spilled before the call. -; CHECK: Z4test1SiS -; CHECK: leal 8(%ebp), %[[reg:[^ ]*]] -; CHECK: movl %[[reg]],{{.*}}(%ebp) ## 4-byte Spill -; CHECK: calll __Z6throwsv - -define i8* @_Z4test1SiS_(%struct.S* byval %s1, i32 %n, %struct.S* byval %s2) ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %retval = alloca i8*, align 4 ; [#uses=2] - %n.addr = alloca i32, align 4 ; [#uses=1] - %_rethrow = alloca i8* ; [#uses=4] - %0 = alloca i32, align 4 ; [#uses=1] - %cleanup.dst = alloca i32 ; [#uses=3] - %cleanup.dst7 = alloca i32 ; [#uses=6] - store i32 %n, i32* %n.addr - invoke void @_Z6throwsv() - to label %invoke.cont unwind label %try.handler - -invoke.cont: ; preds = %entry - store i32 1, i32* %cleanup.dst7 - br label %finally - -terminate.handler: ; preds = %match.end - %1 = landingpad { i8*, i32 } - cleanup - call void @_ZSt9terminatev() noreturn nounwind - unreachable - -try.handler: ; preds = %entry - %exc1.ptr = landingpad { i8*, i32 } - catch i8* null - %exc1 = extractvalue { i8*, i32 } %exc1.ptr, 0 - %selector = extractvalue { i8*, i32 } %exc1.ptr, 1 - %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) ; [#uses=1] - %3 = icmp eq i32 %selector, %2 ; [#uses=1] - br i1 %3, label %match, label %catch.next - -match: ; preds = %try.handler - %4 = call i8* @__cxa_begin_catch(i8* %exc1) ; [#uses=1] - %5 = bitcast i8* %4 to i32* ; [#uses=1] - %6 = load i32, i32* %5 ; [#uses=1] - store i32 %6, i32* %0 - %call = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), %struct.S* %s2) - to label %invoke.cont2 unwind label %match.handler ; [#uses=0] - -invoke.cont2: ; preds = %match - store i32 1, i32* %cleanup.dst - br label %match.end - -match.handler: ; preds = %match - %exc3 = landingpad { i8*, i32 } - cleanup - %7 = extractvalue { i8*, i32 } %exc3, 0 - store i8* %7, i8** %_rethrow - store i32 2, i32* %cleanup.dst - br label %match.end - -cleanup.pad: ; preds = %cleanup.switch - store i32 1, i32* %cleanup.dst7 - br label %finally - -cleanup.pad4: ; preds = %cleanup.switch - store i32 2, i32* %cleanup.dst7 - br label %finally - -match.end: ; preds = %match.handler, %invoke.cont2 - invoke void @__cxa_end_catch() - to label %invoke.cont5 unwind label %terminate.handler - -invoke.cont5: ; preds = %match.end - br label %cleanup.switch - -cleanup.switch: ; preds = %invoke.cont5 - %tmp = load i32, i32* %cleanup.dst ; [#uses=1] - switch i32 %tmp, label %cleanup.end [ - i32 1, label %cleanup.pad - i32 2, label %cleanup.pad4 - ] - -cleanup.end: ; preds = %cleanup.switch - store i32 2, i32* %cleanup.dst7 - br label %finally - -catch.next: ; preds = %try.handler - store i8* %exc1, i8** %_rethrow - store i32 2, i32* %cleanup.dst7 - br label %finally - -finally: ; preds = %catch.next, %cleanup.end, %cleanup.pad4, %cleanup.pad, %invoke.cont - br label %cleanup.switch9 - -cleanup.switch9: ; preds = %finally - %tmp8 = load i32, i32* %cleanup.dst7 ; [#uses=1] - switch i32 %tmp8, label %cleanup.end10 [ - i32 1, label %finally.end - i32 2, label %finally.throw - ] - -cleanup.end10: ; preds = %cleanup.switch9 - br label %finally.end - -finally.throw: ; preds = %cleanup.switch9 - %8 = load i8*, i8** %_rethrow ; [#uses=1] - call void @_Unwind_Resume_or_Rethrow(i8* %8) - unreachable - -finally.end: ; preds = %cleanup.end10, %cleanup.switch9 - %tmp11 = getelementptr inbounds %struct.S, %struct.S* %s1, i32 0, i32 0 ; <[2 x i8*]*> [#uses=1] - %arraydecay = getelementptr inbounds [2 x i8*], [2 x i8*]* %tmp11, i32 0, i32 0 ; [#uses=1] - %arrayidx = getelementptr inbounds i8*, i8** %arraydecay, i32 1 ; [#uses=1] - %tmp12 = load i8*, i8** %arrayidx ; [#uses=1] - store i8* %tmp12, i8** %retval - %9 = load i8*, i8** %retval ; [#uses=1] - ret i8* %9 -} - -declare void @_Z6throwsv() ssp - -declare i32 @__gxx_personality_v0(...) - -declare void @_ZSt9terminatev() - -declare void @_Unwind_Resume_or_Rethrow(i8*) - -declare i32 @llvm.eh.typeid.for(i8*) nounwind - -declare i8* @__cxa_begin_catch(i8*) - -declare i32 @printf(i8*, ...) - -declare void @__cxa_end_catch() Index: test/CodeGen/X86/statepoint-stackmap-format.ll =================================================================== --- test/CodeGen/X86/statepoint-stackmap-format.ll +++ test/CodeGen/X86/statepoint-stackmap-format.ll @@ -96,9 +96,33 @@ ret i32 %ld } +; Test that function arguments at fixed stack offset +; can be directly encoded in the stack map, without +; spilling. +%struct = type { i64, i64, i64 } + +declare void @use(%struct*) + +define void @test_fixed_arg(%struct* byval %x) gc "statepoint-example" { +; CHECK-LABEL: test_fixed_arg +; CHECK: pushq %rax +; CHECK: leaq 16(%rsp), %rdi +; Should not spill fixed stack address. +; CHECK-NOT: movq %rdi, (%rsp) +; CHECK: callq use +; CHECK: popq %rax +; CHECK: retq +entry: + br label %bb + +bb: ; preds = %entry + %statepoint_token = call token (i64, i32, void (%struct*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp0s_structsf(i64 0, i32 0, void (%struct*)* @use, i32 1, i32 0, %struct* %x, i32 0, i32 1, %struct* %x) + ret void +} declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...) declare token @llvm.experimental.gc.statepoint.p0f_isVoidi64i64i64i64i64i64i64i64f(i64, i32, void (i64, i64, i64, i64, i64, i64, i64, i64)*, i32, i32, ...) +declare token @llvm.experimental.gc.statepoint.p0f_isVoidp0s_structsf(i64, i32, void (%struct*)*, i32, i32, ...) declare i1 @llvm.experimental.gc.result.i1(token) declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) #3 @@ -109,11 +133,11 @@ ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .short 0 ; Num Functions -; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 5 ; Num LargeConstants ; CHECK-NEXT: .long 0 ; Num Callsites -; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 5 ; Functions and stack size ; CHECK-NEXT: .quad test @@ -128,6 +152,9 @@ ; CHECK-NEXT: .quad test_spadj ; CHECK-NEXT: .quad 8 ; CHECK-NEXT: .quad 1 +; CHECK-NEXT: .quad test_fixed_arg +; CHECK-NEXT: .quad 8 +; CHECK-NEXT: .quad 1 ; ; test @@ -421,3 +448,59 @@ ; CHECK: .short 0 ; CHECK: .short 0 ; CHECK: .p2align 3 + +; +; test_fixed_arg + +; Statepoint ID +; CHECK-NEXT: .quad 0 + +; Instruction Offset +; CHECK-NEXT: .long .Ltmp4-test_fixed_arg + +; Reserved: +; CHECK: .short 0 + +; NumLocations: +; CHECK: .short 4 + +; StkMapRecord[0]: +; SmallConstant(0): +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .short 8 +; CHECK: .short 0 +; CHECK-NEXT: .short 0 +; CHECK: .long 0 + +; StkMapRecord[1]: +; SmallConstant(0): +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .short 8 +; CHECK: .short 0 +; CHECK-NEXT: .short 0 +; CHECK: .long 0 + +; StkMapRecord[2]: +; SmallConstant(1): +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .short 8 +; CHECK: .short 0 +; CHECK-NEXT: .short 0 +; CHECK: .long 1 + +; StkMapRecord[3]: +; Direct RSP+16 +; CHECK: .byte 2 +; CHECK-NEXT: .byte 0 +; CHECK: .short 8 +; CHECK: .short 7 +; CHECK-NEXT: .short 0 +; CHECK: .long 16 + +; No padding or LiveOuts +; CHECK: .short 0 +; CHECK: .short 0 +; CHECK: .p2align 3 Index: test/CodeGen/X86/x86-32-intrcc.ll =================================================================== --- test/CodeGen/X86/x86-32-intrcc.ll +++ test/CodeGen/X86/x86-32-intrcc.ll @@ -15,8 +15,7 @@ ; CHECK: iretl ; CHECK0-LABEL: test_isr_no_ecode: ; CHECK0: pushl %eax - ; CHECK0: leal 4(%esp), %eax - ; CHECK0: movl 8(%eax), %eax + ; CHECK0: movl 12(%esp), %eax ; CHECK0: popl %eax ; CHECK0: iretl %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2 @@ -41,8 +40,7 @@ ; CHECK0: pushl %ecx ; CHECK0: pushl %eax ; CHECK0: movl 8(%esp), %eax - ; CHECK0: leal 12(%esp), %ecx - ; CHECK0: movl 8(%ecx), %ecx + ; CHECK0: movl 20(%esp), %ecx ; CHECK0: popl %eax ; CHECK0: popl %ecx ; CHECK0: addl $4, %esp Index: test/CodeGen/X86/x86-64-intrcc.ll =================================================================== --- test/CodeGen/X86/x86-64-intrcc.ll +++ test/CodeGen/X86/x86-64-intrcc.ll @@ -15,8 +15,7 @@ ; CHECK: iretq ; CHECK0-LABEL: test_isr_no_ecode: ; CHECK0: pushq %rax - ; CHECK0: leaq 8(%rsp), %rax - ; CHECK0: movq 16(%rax), %rax + ; CHECK0: movq 24(%rsp), %rax ; CHECK0: popq %rax ; CHECK0: iretq %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2 @@ -43,8 +42,7 @@ ; CHECK0: pushq %rax ; CHECK0: pushq %rcx ; CHECK0: movq 24(%rsp), %rax - ; CHECK0: leaq 32(%rsp), %rcx - ; CHECK0: movq 16(%rcx), %rcx + ; CHECK0: movq 48(%rsp), %rcx ; CHECK0: popq %rcx ; CHECK0: popq %rax ; CHECK0: addq $16, %rsp Index: test/DebugInfo/X86/dbg-declare-inalloca.ll =================================================================== --- test/DebugInfo/X86/dbg-declare-inalloca.ll +++ test/DebugInfo/X86/dbg-declare-inalloca.ll @@ -31,10 +31,10 @@ ; any propagation or analysis. ; CHECK: _f: # @f -; CHECK: Lfunc_begin0: -; CHECK-NOT: DEBUG_VALUE -; CHECK: [[start:Ltmp[0-9]+]]: +; CHECK: [[start:Lfunc_begin0]]: ; CHECK-NOT: DEBUG_VALUE +; DEBUG: [[start:Ltmp[0-9]+]]: +; DEBUG-NOT: DEBUG_VALUE ; CHECK: cmpl ; CHECK: calll _g ; CHECK: calll _h @@ -42,10 +42,9 @@ ; CHECK: [[end:Ltmp[0-9]+]]: ; CHECK: Lfunc_end0: -; FIXME: Optimized debug info should preserve this. -; DEBUG: .short 4414 # Record kind: S_LOCAL -; DEBUG: .asciz "a" -; DEBUG: .cv_def_range [[start]] [[end]] +; CHECK: .short 4414 # Record kind: S_LOCAL +; CHECK: .asciz "a" +; CHECK: .cv_def_range [[start]] [[end]] ; CHECK: .short 4414 # Record kind: S_LOCAL ; CHECK: .asciz "b"