Index: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp +++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp @@ -608,7 +608,6 @@ int64_t RDXShadowSlot = 0; // If inlining in the prolog, save RCX and RDX. - // Future optimization: don't save or restore if not live in. if (InProlog) { // Compute the offsets. We need to account for things already // pushed onto the stack at this point: return address, frame @@ -616,15 +615,30 @@ X86MachineFunctionInfo *X86FI = MF.getInfo(); const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize(); const bool HasFP = hasFP(MF); - RCXShadowSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0); - RDXShadowSlot = RCXShadowSlot + 8; - // Emit the saves. - addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, - RCXShadowSlot) - .addReg(X86::RCX); - addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, - RDXShadowSlot) - .addReg(X86::RDX); + + // Check if we need to spill RCX and/or RDX. + // Here we assume that no earlier prologue instruction changes RCX and/or + // RDX, so checking the block live-ins is enough. + const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX); + const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX); + int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0); + // Assign the initial slot to both registers, then change RDX's slot if both + // need to be spilled. + if (IsRCXLiveIn) + RCXShadowSlot = InitSlot; + if (IsRDXLiveIn) + RDXShadowSlot = InitSlot; + if (IsRDXLiveIn && IsRCXLiveIn) + RDXShadowSlot += 8; + // Emit the saves if needed. + if (IsRCXLiveIn) + addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, + RCXShadowSlot) + .addReg(X86::RCX); + if (IsRDXLiveIn) + addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, + RDXShadowSlot) + .addReg(X86::RDX); } else { // Not in the prolog. Copy RAX to a virtual reg. BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX); @@ -661,6 +675,7 @@ BuildMI(&MBB, DL, TII.get(X86::JAE_1)).addMBB(ContinueMBB); // Add code to roundMBB to round the final stack pointer to a page boundary. + RoundMBB->addLiveIn(FinalReg); BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg) .addReg(FinalReg) .addImm(PageMask); @@ -677,6 +692,7 @@ .addMBB(LoopMBB); } + LoopMBB->addLiveIn(JoinReg); addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg, false, -PageSize); @@ -688,6 +704,8 @@ .addImm(0) .addReg(0) .addImm(0); + + LoopMBB->addLiveIn(RoundedReg); BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr)) .addReg(RoundedReg) .addReg(ProbeReg); @@ -697,16 +715,19 @@ // If in prolog, restore RDX and RCX. if (InProlog) { - addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm), - X86::RCX), - X86::RSP, false, RCXShadowSlot); - addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm), - X86::RDX), - X86::RSP, false, RDXShadowSlot); + if (RCXShadowSlot) // It means we spilled RCX in the prologue. + addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, + TII.get(X86::MOV64rm), X86::RCX), + X86::RSP, false, RCXShadowSlot); + if (RDXShadowSlot) // It means we spilled RDX in the prologue. + addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, + TII.get(X86::MOV64rm), X86::RDX), + X86::RSP, false, RDXShadowSlot); } // Now that the probing is done, add code to continueMBB to update // the stack pointer for real. + ContinueMBB->addLiveIn(SizeReg); BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP) .addReg(X86::RSP) .addReg(SizeReg); @@ -734,8 +755,6 @@ CMBBI->setFlag(MachineInstr::FrameSetup); } } - - // Possible TODO: physreg liveness for InProlog case. } void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, Index: llvm/trunk/test/CodeGen/X86/win_coreclr_chkstk.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/win_coreclr_chkstk.ll +++ llvm/trunk/test/CodeGen/X86/win_coreclr_chkstk.ll @@ -10,8 +10,6 @@ ; WIN_X64-LABEL:main4k: ; WIN_X64: # %bb.0: ; WIN_X64: movl $4096, %eax -; WIN_X64: movq %rcx, 8(%rsp) -; WIN_X64: movq %rdx, 16(%rsp) ; WIN_X64: xorq %rcx, %rcx ; WIN_X64: movq %rsp, %rdx ; WIN_X64: subq %rax, %rdx @@ -27,8 +25,6 @@ ; WIN_X64: cmpq %rcx, %rdx ; WIN_X64: jne .LBB0_2 ; WIN_X64:.LBB0_3: -; WIN_X64: movq 8(%rsp), %rcx -; WIN_X64: movq 16(%rsp), %rdx ; WIN_X64: subq %rax, %rsp ; WIN_X64: xorl %eax, %eax ; WIN_X64: addq $4096, %rsp @@ -45,7 +41,6 @@ define i32 @main4k_frame() nounwind "no-frame-pointer-elim"="true" { entry: ; WIN_X64-LABEL:main4k_frame: -; WIN_X64: movq %rcx, 16(%rsp) ; WIN_X64: movq %gs:16, %rcx ; LINUX-LABEL:main4k_frame: ; LINUX-NOT: movq %gs:16, %rcx @@ -58,7 +53,6 @@ ; Case with INT args define i32 @main4k_intargs(i32 %x, i32 %y) nounwind { entry: -; WIN_X64: movq %rcx, 8(%rsp) ; WIN_X64: movq %gs:16, %rcx ; LINUX-NOT: movq %gs:16, %rcx ; LINUX: retq @@ -71,7 +65,6 @@ ; Case with FP regs define i32 @main4k_fpargs(double %x, double %y) nounwind { entry: -; WIN_X64: movq %rcx, 8(%rsp) ; WIN_X64: movq %gs:16, %rcx ; LINUX-NOT: movq %gs:16, %rcx ; LINUX: retq Index: llvm/trunk/test/CodeGen/X86/win_coreclr_chkstk_liveins.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/win_coreclr_chkstk_liveins.mir +++ llvm/trunk/test/CodeGen/X86/win_coreclr_chkstk_liveins.mir @@ -0,0 +1,24 @@ +# RUN: llc -verify-machineinstrs %s -run-pass prologepilog -mtriple=x86_64-pc-win32-coreclr -o - | FileCheck %s +... +--- +name: main4k +# CHECK-LABEL: name: main4k + +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 8 +stack: + - { id: 0, size: 4096, alignment: 1, stack-id: 0 } +body: | + bb.0.entry: + $eax = IMPLICIT_DEF + RET 0, killed $eax + + ; CHECK: bb.1.entry: + ; CHECK: liveins: $rdx + ; CHECK: bb.2.entry: + ; CHECK: liveins: $rcx, $rdx + ; CHECK: bb.3.entry: + ; CHECK: liveins: $rax +...