diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -64,6 +64,18 @@ bool ExpandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); bool ExpandMBB(MachineBasicBlock &MBB); + + /// This function expands pseudos which affects control flow. + /// It is done in separate pass to simplify blocks navigation in main + /// pass(calling ExpandMBB). + bool ExpandPseudosWhichAffectControlFlow(MachineFunction &MF); + + /// Expand X86::VASTART_SAVE_XMM_REGS into set of xmm copying instructions, + /// placed into separate block guarded by check for al register(for SystemV + /// abi). + void ExpandVastartSaveXmmRegs( + MachineBasicBlock *MBB, + MachineBasicBlock::iterator VAStartPseudoInstr) const; }; char X86ExpandPseudo::ID = 0; @@ -491,6 +503,115 @@ llvm_unreachable("Previous switch has a fallthrough?"); } +// This function creates additional block for storing varargs guarded +// registers. It adds check for %al into entry block, to skip +// GuardedRegsBlk if xmm registers should not be stored. +// +// EntryBlk[VAStartPseudoInstr] EntryBlk +// | | . +// | | . +// | | GuardedRegsBlk +// | => | . +// | | . +// | TailBlk +// | | +// | | +// +void X86ExpandPseudo::ExpandVastartSaveXmmRegs( + MachineBasicBlock *EntryBlk, + MachineBasicBlock::iterator VAStartPseudoInstr) const { + assert(VAStartPseudoInstr->getOpcode() == X86::VASTART_SAVE_XMM_REGS); + + MachineFunction *Func = EntryBlk->getParent(); + const TargetInstrInfo *TII = STI->getInstrInfo(); + DebugLoc DL = VAStartPseudoInstr->getDebugLoc(); + Register CountReg = VAStartPseudoInstr->getOperand(0).getReg(); + + // Calculate liveins for newly created blocks. + LivePhysRegs LiveRegs(*STI->getRegisterInfo()); + SmallVector, 8> Clobbers; + + LiveRegs.addLiveIns(*EntryBlk); + for (MachineInstr &MI : EntryBlk->instrs()) { + if (MI.getOpcode() == VAStartPseudoInstr->getOpcode()) + break; + + LiveRegs.stepForward(MI, Clobbers); + } + + // Create the new basic blocks. One block contains all the XMM stores, + // and another block is the final destination regardless of whether any + // stores were performed. + const BasicBlock *LLVMBlk = EntryBlk->getBasicBlock(); + MachineFunction::iterator EntryBlkIter = ++EntryBlk->getIterator(); + MachineBasicBlock *GuardedRegsBlk = Func->CreateMachineBasicBlock(LLVMBlk); + MachineBasicBlock *TailBlk = Func->CreateMachineBasicBlock(LLVMBlk); + Func->insert(EntryBlkIter, GuardedRegsBlk); + Func->insert(EntryBlkIter, TailBlk); + + // Transfer the remainder of EntryBlk and its successor edges to TailBlk. + TailBlk->splice(TailBlk->begin(), EntryBlk, + std::next(MachineBasicBlock::iterator(VAStartPseudoInstr)), + EntryBlk->end()); + TailBlk->transferSuccessorsAndUpdatePHIs(EntryBlk); + + int64_t FrameIndex = VAStartPseudoInstr->getOperand(1).getImm(); + Register BaseReg; + uint64_t FrameOffset = + X86FL->getFrameIndexReference(*Func, FrameIndex, BaseReg).getFixed(); + uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(2).getImm(); + + // TODO: add support for YMM and ZMM here. + unsigned MOVOpc = STI->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; + + // In the XMM save block, save all the XMM argument registers. + for (int64_t OpndIdx = 3, RegIdx = 0; + OpndIdx < VAStartPseudoInstr->getNumOperands() - 1; + OpndIdx++, RegIdx++) { + + int64_t Offset = FrameOffset + VarArgsRegsOffset + RegIdx * 16; + + MachineMemOperand *MMO = Func->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*Func, FrameIndex, Offset), + MachineMemOperand::MOStore, + /*Size=*/16, Align(16)); + + BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc)) + .addReg(BaseReg) + .addImm(/*Scale=*/1) + .addReg(/*IndexReg=*/0) + .addImm(/*Disp=*/Offset) + .addReg(/*Segment=*/0) + .addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg()) + .addMemOperand(MMO); + assert(Register::isPhysicalRegister( + VAStartPseudoInstr->getOperand(OpndIdx).getReg())); + } + + // The original block will now fall through to the GuardedRegsBlk. + EntryBlk->addSuccessor(GuardedRegsBlk); + // The GuardedRegsBlk will fall through to the TailBlk. + GuardedRegsBlk->addSuccessor(TailBlk); + + if (!STI->isCallingConvWin64(Func->getFunction().getCallingConv())) { + // If %al is 0, branch around the XMM save block. + BuildMI(EntryBlk, DL, TII->get(X86::TEST8rr)) + .addReg(CountReg) + .addReg(CountReg); + BuildMI(EntryBlk, DL, TII->get(X86::JCC_1)) + .addMBB(TailBlk) + .addImm(X86::COND_E); + EntryBlk->addSuccessor(TailBlk); + } + + // Add liveins to the created block. + addLiveIns(*GuardedRegsBlk, LiveRegs); + addLiveIns(*TailBlk, LiveRegs); + + // Delete the pseudo. + VAStartPseudoInstr->eraseFromParent(); +} + /// Expand all pseudo instructions contained in \p MBB. /// \returns true if any expansion occurred for \p MBB. bool X86ExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { @@ -507,6 +628,20 @@ return Modified; } +bool X86ExpandPseudo::ExpandPseudosWhichAffectControlFlow(MachineFunction &MF) { + // Currently pseudo which affects control flow is only + // X86::VASTART_SAVE_XMM_REGS which is located in Entry block. + // So we do not need to evaluate other blocks. + for (MachineInstr &Instr : MF.front().instrs()) { + if (Instr.getOpcode() == X86::VASTART_SAVE_XMM_REGS) { + ExpandVastartSaveXmmRegs(&(MF.front()), Instr); + return true; + } + } + + return false; +} + bool X86ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { STI = &static_cast(MF.getSubtarget()); TII = STI->getInstrInfo(); @@ -514,7 +649,8 @@ X86FI = MF.getInfo(); X86FL = STI->getFrameLowering(); - bool Modified = false; + bool Modified = ExpandPseudosWhichAffectControlFlow(MF); + for (MachineBasicBlock &MBB : MF) Modified |= ExpandMBB(MBB); return Modified; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1586,10 +1586,6 @@ EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; /// Utility function to emit the xmm reg save portion of va_start. - MachineBasicBlock * - EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr, - MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1, MachineInstr &MI2, MachineBasicBlock *BB) const; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3499,9 +3499,12 @@ Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass); ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8); for (MCPhysReg Reg : AvailableXmms) { - Register XMMReg = TheMachineFunction.addLiveIn(Reg, &X86::VR128RegClass); - LiveXMMRegs.push_back( - DAG.getCopyFromReg(Chain, DL, XMMReg, MVT::v4f32)); + // FastRegisterAllocator spills virtual registers at basic + // block boundary. That leads to usages of xmm registers + // outside of check for %al. Pass physical registers to + // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling. + TheMachineFunction.getRegInfo().addLiveIn(Reg); + LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32)); } } @@ -32076,81 +32079,6 @@ return endMBB; } -MachineBasicBlock *X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( - MachineInstr &MI, MachineBasicBlock *MBB) const { - // Emit code to save XMM registers to the stack. The ABI says that the - // number of registers to save is given in %al, so it's theoretically - // possible to do an indirect jump trick to avoid saving all of them, - // however this code takes a simpler approach and just executes all - // of the stores if %al is non-zero. It's less code, and it's probably - // easier on the hardware branch predictor, and stores aren't all that - // expensive anyway. - - // Create the new basic blocks. One block contains all the XMM stores, - // and one block is the final destination regardless of whether any - // stores were performed. - const BasicBlock *LLVM_BB = MBB->getBasicBlock(); - MachineFunction *F = MBB->getParent(); - MachineFunction::iterator MBBIter = ++MBB->getIterator(); - MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(MBBIter, XMMSaveMBB); - F->insert(MBBIter, EndMBB); - - // Transfer the remainder of MBB and its successor edges to EndMBB. - EndMBB->splice(EndMBB->begin(), MBB, - std::next(MachineBasicBlock::iterator(MI)), MBB->end()); - EndMBB->transferSuccessorsAndUpdatePHIs(MBB); - - // The original block will now fall through to the XMM save block. - MBB->addSuccessor(XMMSaveMBB); - // The XMMSaveMBB will fall through to the end block. - XMMSaveMBB->addSuccessor(EndMBB); - - // Now add the instructions. - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - const DebugLoc &DL = MI.getDebugLoc(); - - Register CountReg = MI.getOperand(0).getReg(); - int RegSaveFrameIndex = MI.getOperand(1).getImm(); - int64_t VarArgsFPOffset = MI.getOperand(2).getImm(); - - if (!Subtarget.isCallingConvWin64(F->getFunction().getCallingConv())) { - // If %al is 0, branch around the XMM save block. - BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg); - BuildMI(MBB, DL, TII->get(X86::JCC_1)).addMBB(EndMBB).addImm(X86::COND_E); - MBB->addSuccessor(EndMBB); - } - - // Make sure the last operand is EFLAGS, which gets clobbered by the branch - // that was just emitted, but clearly shouldn't be "saved". - assert((MI.getNumOperands() <= 3 || - !MI.getOperand(MI.getNumOperands() - 1).isReg() || - MI.getOperand(MI.getNumOperands() - 1).getReg() == X86::EFLAGS) && - "Expected last argument to be EFLAGS"); - unsigned MOVOpc = Subtarget.hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; - // In the XMM save block, save all the XMM argument registers. - for (int i = 3, e = MI.getNumOperands() - 1; i != e; ++i) { - int64_t Offset = (i - 3) * 16 + VarArgsFPOffset; - MachineMemOperand *MMO = F->getMachineMemOperand( - MachinePointerInfo::getFixedStack(*F, RegSaveFrameIndex, Offset), - MachineMemOperand::MOStore, - /*Size=*/16, Align(16)); - BuildMI(XMMSaveMBB, DL, TII->get(MOVOpc)) - .addFrameIndex(RegSaveFrameIndex) - .addImm(/*Scale=*/1) - .addReg(/*IndexReg=*/0) - .addImm(/*Disp=*/Offset) - .addReg(/*Segment=*/0) - .addReg(MI.getOperand(i).getReg()) - .addMemOperand(MMO); - } - - MI.eraseFromParent(); // The pseudo instruction is gone now. - - return EndMBB; -} - // The EFLAGS operand of SelectItr might be missing a kill marker // because there were multiple uses of EFLAGS, and ISel didn't know // which to mark. Figure out whether SelectItr should have had a @@ -33984,9 +33912,6 @@ case X86::XBEGIN: return emitXBegin(MI, BB, Subtarget.getInstrInfo()); - case X86::VASTART_SAVE_XMM_REGS: - return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB); - case X86::VAARG_64: case X86::VAARG_X32: return EmitVAARGWithCustomInserter(MI, BB); diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -69,7 +69,7 @@ let SchedRW = [WriteSystem] in { // x86-64 va_start lowering magic. -let usesCustomInserter = 1, Defs = [EFLAGS] in { +let hasSideEffects = 1, Defs = [EFLAGS] in { def VASTART_SAVE_XMM_REGS : I<0, Pseudo, (outs), (ins GR8:$al, @@ -80,7 +80,9 @@ timm:$regsavefi, timm:$offset), (implicit EFLAGS)]>; +} +let usesCustomInserter = 1, Defs = [EFLAGS] in { // The VAARG_64 and VAARG_X32 pseudo-instructions take the address of the // va_list, and place the address of the next argument into a register. let Defs = [EFLAGS] in { diff --git a/llvm/test/CodeGen/X86/musttail-varargs.ll b/llvm/test/CodeGen/X86/musttail-varargs.ll --- a/llvm/test/CodeGen/X86/musttail-varargs.ll +++ b/llvm/test/CodeGen/X86/musttail-varargs.ll @@ -37,13 +37,26 @@ ; LINUX-NEXT: .cfi_offset %r14, -32 ; LINUX-NEXT: .cfi_offset %r15, -24 ; LINUX-NEXT: .cfi_offset %rbp, -16 +; LINUX-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUX-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUX-NEXT: movaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUX-NEXT: movaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUX-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUX-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUX-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUX-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUX-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; LINUX-NEXT: movq %r9, %r15 ; LINUX-NEXT: movq %r8, %r12 ; LINUX-NEXT: movq %rcx, %r13 ; LINUX-NEXT: movq %rdx, %rbp ; LINUX-NEXT: movq %rsi, %rbx ; LINUX-NEXT: movq %rdi, %r14 -; LINUX-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; LINUX-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; LINUX-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; LINUX-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; LINUX-NEXT: movq %r8, {{[0-9]+}}(%rsp) +; LINUX-NEXT: movq %r9, {{[0-9]+}}(%rsp) ; LINUX-NEXT: testb %al, %al ; LINUX-NEXT: je .LBB0_2 ; LINUX-NEXT: # %bb.1: @@ -56,27 +69,13 @@ ; LINUX-NEXT: movaps %xmm6, {{[0-9]+}}(%rsp) ; LINUX-NEXT: movaps %xmm7, {{[0-9]+}}(%rsp) ; LINUX-NEXT: .LBB0_2: -; LINUX-NEXT: movq %rbx, {{[0-9]+}}(%rsp) -; LINUX-NEXT: movq %rbp, {{[0-9]+}}(%rsp) -; LINUX-NEXT: movq %r13, {{[0-9]+}}(%rsp) -; LINUX-NEXT: movq %r12, {{[0-9]+}}(%rsp) -; LINUX-NEXT: movq %r15, {{[0-9]+}}(%rsp) ; LINUX-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; LINUX-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; LINUX-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; LINUX-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; LINUX-NEXT: movabsq $206158430216, %rax # imm = 0x3000000008 ; LINUX-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; LINUX-NEXT: movq %r14, %rdi -; LINUX-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUX-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUX-NEXT: movaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUX-NEXT: movaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUX-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUX-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUX-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUX-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUX-NEXT: callq get_f +; LINUX-NEXT: callq get_f@PLT ; LINUX-NEXT: movq %rax, %r11 ; LINUX-NEXT: movq %r14, %rdi ; LINUX-NEXT: movq %rbx, %rsi @@ -131,13 +130,26 @@ ; LINUX-X32-NEXT: .cfi_offset %r14, -32 ; LINUX-X32-NEXT: .cfi_offset %r15, -24 ; LINUX-X32-NEXT: .cfi_offset %rbp, -16 +; LINUX-X32-NEXT: movaps %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; LINUX-X32-NEXT: movaps %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; LINUX-X32-NEXT: movaps %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; LINUX-X32-NEXT: movaps %xmm4, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; LINUX-X32-NEXT: movaps %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; LINUX-X32-NEXT: movaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; LINUX-X32-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; LINUX-X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; LINUX-X32-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; LINUX-X32-NEXT: movq %r9, %r15 ; LINUX-X32-NEXT: movq %r8, %r12 ; LINUX-X32-NEXT: movq %rcx, %r13 ; LINUX-X32-NEXT: movq %rdx, %rbp ; LINUX-X32-NEXT: movq %rsi, %rbx ; LINUX-X32-NEXT: movq %rdi, %r14 -; LINUX-X32-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; LINUX-X32-NEXT: movq %rsi, {{[0-9]+}}(%esp) +; LINUX-X32-NEXT: movq %rdx, {{[0-9]+}}(%esp) +; LINUX-X32-NEXT: movq %rcx, {{[0-9]+}}(%esp) +; LINUX-X32-NEXT: movq %r8, {{[0-9]+}}(%esp) +; LINUX-X32-NEXT: movq %r9, {{[0-9]+}}(%esp) ; LINUX-X32-NEXT: testb %al, %al ; LINUX-X32-NEXT: je .LBB0_2 ; LINUX-X32-NEXT: # %bb.1: @@ -150,27 +162,13 @@ ; LINUX-X32-NEXT: movaps %xmm6, {{[0-9]+}}(%esp) ; LINUX-X32-NEXT: movaps %xmm7, {{[0-9]+}}(%esp) ; LINUX-X32-NEXT: .LBB0_2: -; LINUX-X32-NEXT: movq %rbx, {{[0-9]+}}(%esp) -; LINUX-X32-NEXT: movq %rbp, {{[0-9]+}}(%esp) -; LINUX-X32-NEXT: movq %r13, {{[0-9]+}}(%esp) -; LINUX-X32-NEXT: movq %r12, {{[0-9]+}}(%esp) -; LINUX-X32-NEXT: movq %r15, {{[0-9]+}}(%esp) ; LINUX-X32-NEXT: leal {{[0-9]+}}(%rsp), %eax ; LINUX-X32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; LINUX-X32-NEXT: leal {{[0-9]+}}(%rsp), %eax ; LINUX-X32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; LINUX-X32-NEXT: movabsq $206158430216, %rax # imm = 0x3000000008 ; LINUX-X32-NEXT: movq %rax, {{[0-9]+}}(%esp) -; LINUX-X32-NEXT: movq %r14, %rdi -; LINUX-X32-NEXT: movaps %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; LINUX-X32-NEXT: movaps %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; LINUX-X32-NEXT: movaps %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; LINUX-X32-NEXT: movaps %xmm4, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; LINUX-X32-NEXT: movaps %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; LINUX-X32-NEXT: movaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; LINUX-X32-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; LINUX-X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; LINUX-X32-NEXT: callq get_f +; LINUX-X32-NEXT: callq get_f@PLT ; LINUX-X32-NEXT: movl %eax, %r11d ; LINUX-X32-NEXT: movq %r14, %rdi ; LINUX-X32-NEXT: movq %rbx, %rsi diff --git a/llvm/test/CodeGen/X86/vastart-defs-eflags.ll b/llvm/test/CodeGen/X86/vastart-defs-eflags.ll --- a/llvm/test/CodeGen/X86/vastart-defs-eflags.ll +++ b/llvm/test/CodeGen/X86/vastart-defs-eflags.ll @@ -9,9 +9,14 @@ ; CHECK-LABEL: check_flag: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: subq $56, %rsp +; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %r9, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je LBB0_2 -; CHECK-NEXT: ## %bb.1: ## %entry +; CHECK-NEXT: je LBB0_4 +; CHECK-NEXT: ## %bb.3: ## %entry ; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) @@ -20,16 +25,11 @@ ; CHECK-NEXT: movaps %xmm5, (%rsp) ; CHECK-NEXT: movaps %xmm6, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movaps %xmm7, {{[0-9]+}}(%rsp) -; CHECK-NEXT: LBB0_2: ## %entry -; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %r8, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %r9, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: LBB0_4: ## %entry ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testl $512, %edi ## imm = 0x200 -; CHECK-NEXT: je LBB0_4 -; CHECK-NEXT: ## %bb.3: ## %if.then +; CHECK-NEXT: je LBB0_2 +; CHECK-NEXT: ## %bb.1: ## %if.then ; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: movq %rax, 16 ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax @@ -37,7 +37,7 @@ ; CHECK-NEXT: movl $48, 4 ; CHECK-NEXT: movl $8, 0 ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: LBB0_4: ## %if.end +; CHECK-NEXT: LBB0_2: ## %if.end ; CHECK-NEXT: addq $56, %rsp ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/x32-va_start.ll b/llvm/test/CodeGen/X86/x32-va_start.ll --- a/llvm/test/CodeGen/X86/x32-va_start.ll +++ b/llvm/test/CodeGen/X86/x32-va_start.ll @@ -1,5 +1,8 @@ -; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -mattr=-sse | FileCheck %s -check-prefix=CHECK -check-prefix=NOSSE +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s -check-prefix=SSE +; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -mattr=-sse | FileCheck %s -check-prefix=NOSSE +; RUN: llc < %s -mtriple=i386-linux-gnux32 | FileCheck %s -check-prefix=32BITABI +; RUN: llc < %s -mtriple=i686-linux-gnux32 | FileCheck %s -check-prefix=32BITABI ; ; Verifies that x32 va_start lowering is sane. To regenerate this test, use ; cat <