diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -64,6 +64,9 @@ bool ExpandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); bool ExpandMBB(MachineBasicBlock &MBB); + + void ExpandSaveVarargXmmRegs(MachineBasicBlock *MBB, + MachineBasicBlock::iterator MBBI) const; }; char X86ExpandPseudo::ID = 0; @@ -445,10 +448,71 @@ case TargetOpcode::ICALL_BRANCH_FUNNEL: ExpandICallBranchFunnel(&MBB, MBBI); return true; + + case X86::SAVE_VARARG_XMM_REGS: + ExpandSaveVarargXmmRegs(&MBB, MBBI); + return true; } llvm_unreachable("Previous switch has a fallthrough?"); } +/// This function replaces X86::SAVE_VARARG_XMM_REGS pseudo instruction +/// with set of copying instructions for specified xmm vararg registers. +/// +/// [0] parameter of X86::SAVE_VARARG_XMM_REGS is frame index of stack area, +/// where registers should be stored +/// [1] parameter of X86::SAVE_VARARG_XMM_REGS is offset inside stack frame +/// to the area where registers should be stored +/// [2] - [till end] parameters of X86::SAVE_VARARG_XMM_REGS are set of +/// xmm registers which should be stored. +void X86ExpandPseudo::ExpandSaveVarargXmmRegs( + MachineBasicBlock *GuardedBlock, + MachineBasicBlock::iterator SaveVarargXmmRegsInstr) const { + assert(SaveVarargXmmRegsInstr->getOpcode() == X86::SAVE_VARARG_XMM_REGS); + + MachineFunction *Func = GuardedBlock->getParent(); + DebugLoc DL = SaveVarargXmmRegsInstr->getDebugLoc(); + + int64_t FrameIndex = SaveVarargXmmRegsInstr->getOperand(0).getImm(); + Register BaseReg; + int64_t FrameOffset = + X86FL->getFrameIndexReference(*Func, FrameIndex, BaseReg); + int64_t VarArgsRegsOffset = SaveVarargXmmRegsInstr->getOperand(1).getImm(); + + // TODO: add support for YMM and ZMM here. + unsigned MOVOpc = STI->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; + + // In the XMM save block, save all the XMM argument registers. + for (int64_t OpndIdx = 2, RegIdx = 0; + OpndIdx < SaveVarargXmmRegsInstr->getNumOperands(); + OpndIdx++, RegIdx++) { + + int64_t Offset = FrameOffset + VarArgsRegsOffset + RegIdx * 16; + + MachineMemOperand *MMO = Func->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*Func, FrameIndex, Offset), + MachineMemOperand::MOStore, + /*Size=*/16, Align(16)); + + BuildMI(GuardedBlock, DL, TII->get(MOVOpc)) + .addReg(BaseReg) + .addImm(/*Scale=*/1) + .addReg(/*IndexReg=*/0) + .addImm(/*Disp=*/Offset) + .addReg(/*Segment=*/0) + .addReg(SaveVarargXmmRegsInstr->getOperand(OpndIdx).getReg()) + .addMemOperand(MMO); + assert(Register::isPhysicalRegister( + SaveVarargXmmRegsInstr->getOperand(OpndIdx).getReg())); + + GuardedBlock->addLiveIn( + SaveVarargXmmRegsInstr->getOperand(OpndIdx).getReg()); + } + + // Delete the pseudo. + SaveVarargXmmRegsInstr->eraseFromParent(); +} + /// Expand all pseudo instructions contained in \p MBB. /// \returns true if any expansion occurred for \p MBB. bool X86ExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1576,6 +1576,10 @@ EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr, MachineBasicBlock *BB) const; + void AddSaveVarargXmmRegsPseudo(MachineBasicBlock *GuardedRegsBlk, + MachineBasicBlock *TailBlk, + MachineInstr &SrcPseudoInstr) const; + MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1, MachineInstr &MI2, MachineBasicBlock *BB) const; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3432,9 +3432,12 @@ unsigned AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass); ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8); for (MCPhysReg Reg : AvailableXmms) { - unsigned XMMReg = TheMachineFunction.addLiveIn(Reg, &X86::VR128RegClass); - LiveXMMRegs.push_back( - DAG.getCopyFromReg(Chain, DL, XMMReg, MVT::v4f32)); + // FastRegisterAllocator spills virtual registers at basic + // block boundary. That leads to usages of xmm registers + // outside of check for %al. Pass physical registers to + // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling. + TheMachineFunction.getRegInfo().addLiveIn(Reg); + LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32)); } } @@ -31080,79 +31083,113 @@ return endMBB; } -MachineBasicBlock *X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( - MachineInstr &MI, MachineBasicBlock *MBB) const { - // Emit code to save XMM registers to the stack. The ABI says that the - // number of registers to save is given in %al, so it's theoretically - // possible to do an indirect jump trick to avoid saving all of them, - // however this code takes a simpler approach and just executes all - // of the stores if %al is non-zero. It's less code, and it's probably - // easier on the hardware branch predictor, and stores aren't all that - // expensive anyway. +// This function creates additional block for storing varargs guarded +// registers. It adds check for %al into entry block, to skip +// GuardedRegsBlk if xmm registers should not be stored. +// +// EntryBlk[VAPseudoInstr] EntryBlk +// | | . +// | | . +// | | GuardedRegsBlk +// | => | . +// | | . +// | TailBlk[VAPseudoInstr] +// | | +// | | +// +static std::pair +createGuardedRegsBlock(MachineBasicBlock *EntryBlk, MachineInstr &VAPseudoInstr, + const X86Subtarget &Subtarget) { + MachineFunction *Func = EntryBlk->getParent(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = VAPseudoInstr.getDebugLoc(); + Register CountReg = VAPseudoInstr.getOperand(0).getReg(); // Create the new basic blocks. One block contains all the XMM stores, // and one block is the final destination regardless of whether any // stores were performed. - const BasicBlock *LLVM_BB = MBB->getBasicBlock(); - MachineFunction *F = MBB->getParent(); - MachineFunction::iterator MBBIter = ++MBB->getIterator(); - MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(MBBIter, XMMSaveMBB); - F->insert(MBBIter, EndMBB); + const BasicBlock *LLVMBlk = EntryBlk->getBasicBlock(); + MachineFunction::iterator EntryBlkIter = ++EntryBlk->getIterator(); + MachineBasicBlock *GuardedRegsBlk = Func->CreateMachineBasicBlock(LLVMBlk); + MachineBasicBlock *TailBlk = Func->CreateMachineBasicBlock(LLVMBlk); + Func->insert(EntryBlkIter, GuardedRegsBlk); + Func->insert(EntryBlkIter, TailBlk); // Transfer the remainder of MBB and its successor edges to EndMBB. - EndMBB->splice(EndMBB->begin(), MBB, - std::next(MachineBasicBlock::iterator(MI)), MBB->end()); - EndMBB->transferSuccessorsAndUpdatePHIs(MBB); + TailBlk->splice(TailBlk->begin(), EntryBlk, + std::next(MachineBasicBlock::iterator(VAPseudoInstr)), + EntryBlk->end()); + TailBlk->transferSuccessorsAndUpdatePHIs(EntryBlk); // The original block will now fall through to the XMM save block. - MBB->addSuccessor(XMMSaveMBB); + EntryBlk->addSuccessor(GuardedRegsBlk); // The XMMSaveMBB will fall through to the end block. - XMMSaveMBB->addSuccessor(EndMBB); - - // Now add the instructions. - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - DebugLoc DL = MI.getDebugLoc(); + GuardedRegsBlk->addSuccessor(TailBlk); - Register CountReg = MI.getOperand(0).getReg(); - int64_t RegSaveFrameIndex = MI.getOperand(1).getImm(); - int64_t VarArgsFPOffset = MI.getOperand(2).getImm(); - - if (!Subtarget.isCallingConvWin64(F->getFunction().getCallingConv())) { + if (!Subtarget.isCallingConvWin64(Func->getFunction().getCallingConv())) { // If %al is 0, branch around the XMM save block. - BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg); - BuildMI(MBB, DL, TII->get(X86::JCC_1)).addMBB(EndMBB).addImm(X86::COND_E); - MBB->addSuccessor(EndMBB); + BuildMI(EntryBlk, DL, TII->get(X86::TEST8rr)) + .addReg(CountReg) + .addReg(CountReg); + BuildMI(EntryBlk, DL, TII->get(X86::JCC_1)) + .addMBB(TailBlk) + .addImm(X86::COND_E); + EntryBlk->addSuccessor(TailBlk); } + return std::make_pair(GuardedRegsBlk, TailBlk); +} + +void X86TargetLowering::AddSaveVarargXmmRegsPseudo( + MachineBasicBlock *GuardedRegsBlk, MachineBasicBlock *TailBlk, + MachineInstr &SrcPseudoInstr) const { // Make sure the last operand is EFLAGS, which gets clobbered by the branch // that was just emitted, but clearly shouldn't be "saved". - assert((MI.getNumOperands() <= 3 || - !MI.getOperand(MI.getNumOperands() - 1).isReg() || - MI.getOperand(MI.getNumOperands() - 1).getReg() == X86::EFLAGS) && + assert((SrcPseudoInstr.getNumOperands() <= 3 || + !SrcPseudoInstr.getOperand(SrcPseudoInstr.getNumOperands() - 1) + .isReg() || + SrcPseudoInstr.getOperand(SrcPseudoInstr.getNumOperands() - 1) + .getReg() == X86::EFLAGS) && "Expected last argument to be EFLAGS"); - unsigned MOVOpc = Subtarget.hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; - // In the XMM save block, save all the XMM argument registers. - for (int i = 3, e = MI.getNumOperands() - 1; i != e; ++i) { - int64_t Offset = (i - 3) * 16 + VarArgsFPOffset; - MachineMemOperand *MMO = F->getMachineMemOperand( - MachinePointerInfo::getFixedStack(*F, RegSaveFrameIndex, Offset), - MachineMemOperand::MOStore, - /*Size=*/16, Align(16)); - BuildMI(XMMSaveMBB, DL, TII->get(MOVOpc)) - .addFrameIndex(RegSaveFrameIndex) - .addImm(/*Scale=*/1) - .addReg(/*IndexReg=*/0) - .addImm(/*Disp=*/Offset) - .addReg(/*Segment=*/0) - .addReg(MI.getOperand(i).getReg()) - .addMemOperand(MMO); - } - MI.eraseFromParent(); // The pseudo instruction is gone now. + // create SAVE_VARARG_XMM_REGS pseudo + MachineInstrBuilder MIB = + BuildMI(GuardedRegsBlk, SrcPseudoInstr.getDebugLoc(), + Subtarget.getInstrInfo()->get(X86::SAVE_VARARG_XMM_REGS)); + + // set Frame Index + MIB.addImm(SrcPseudoInstr.getOperand(1).getImm()); + + // set ArgsOffset + MIB.addImm(SrcPseudoInstr.getOperand(2).getImm()); + + for (unsigned OpndIdx = 3, RegIdx = 0; + OpndIdx + 1 < SrcPseudoInstr.getNumOperands(); OpndIdx++, RegIdx++) + MIB.addReg(SrcPseudoInstr.getOperand(OpndIdx).getReg(), + RegState::InternalRead); + + SrcPseudoInstr.eraseFromParent(); // The pseudo instruction is gone now. +} + +MachineBasicBlock *X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( + MachineInstr &PseudoVaStartInstr, MachineBasicBlock *EntryBlk) const { + // Emit code to save XMM registers to the stack. The ABI says that the + // number of registers to save is given in %al, so it's theoretically + // possible to do an indirect jump trick to avoid saving all of them, + // however this code takes a simpler approach and just executes all + // of the stores if %al is non-zero. It's less code, and it's probably + // easier on the hardware branch predictor, and stores aren't all that + // expensive anyway. + + MachineBasicBlock *GuardedRegsBlk = nullptr; + MachineBasicBlock *TailBlk = nullptr; + + std::tie(GuardedRegsBlk, TailBlk) = + createGuardedRegsBlock(EntryBlk, PseudoVaStartInstr, Subtarget); + + AddSaveVarargXmmRegsPseudo(GuardedRegsBlk, TailBlk, PseudoVaStartInstr); - return EndMBB; + return TailBlk; } // The EFLAGS operand of SelectItr might be missing a kill marker diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -68,6 +68,15 @@ let SchedRW = [WriteSystem] in { +let hasSideEffects = 1 in { +def SAVE_VARARG_XMM_REGS : I<0, Pseudo, + (outs), + (ins i64imm:$regsavefi, i64imm:$offset, + variable_ops), + "#SAVE_VARARG_XMM_REGS $regsavefi, $offset", + []>; +} + // x86-64 va_start lowering magic. let usesCustomInserter = 1, Defs = [EFLAGS] in { def VASTART_SAVE_XMM_REGS : I<0, Pseudo, diff --git a/llvm/test/CodeGen/X86/musttail-varargs.ll b/llvm/test/CodeGen/X86/musttail-varargs.ll --- a/llvm/test/CodeGen/X86/musttail-varargs.ll +++ b/llvm/test/CodeGen/X86/musttail-varargs.ll @@ -37,12 +37,25 @@ ; LINUX-NEXT: .cfi_offset %r14, -32 ; LINUX-NEXT: .cfi_offset %r15, -24 ; LINUX-NEXT: .cfi_offset %rbp, -16 +; LINUX-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUX-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUX-NEXT: movaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUX-NEXT: movaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUX-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUX-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUX-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUX-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; LINUX-NEXT: movq %r9, %r15 ; LINUX-NEXT: movq %r8, %r12 ; LINUX-NEXT: movq %rcx, %r13 ; LINUX-NEXT: movq %rdx, %rbp ; LINUX-NEXT: movq %rsi, %rbx ; LINUX-NEXT: movq %rdi, %r14 +; LINUX-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; LINUX-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; LINUX-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; LINUX-NEXT: movq %r8, {{[0-9]+}}(%rsp) +; LINUX-NEXT: movq %r9, {{[0-9]+}}(%rsp) ; LINUX-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; LINUX-NEXT: testb %al, %al ; LINUX-NEXT: je .LBB0_2 @@ -56,11 +69,6 @@ ; LINUX-NEXT: movaps %xmm6, {{[0-9]+}}(%rsp) ; LINUX-NEXT: movaps %xmm7, {{[0-9]+}}(%rsp) ; LINUX-NEXT: .LBB0_2: -; LINUX-NEXT: movq %rbx, {{[0-9]+}}(%rsp) -; LINUX-NEXT: movq %rbp, {{[0-9]+}}(%rsp) -; LINUX-NEXT: movq %r13, {{[0-9]+}}(%rsp) -; LINUX-NEXT: movq %r12, {{[0-9]+}}(%rsp) -; LINUX-NEXT: movq %r15, {{[0-9]+}}(%rsp) ; LINUX-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; LINUX-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; LINUX-NEXT: leaq {{[0-9]+}}(%rsp), %rax @@ -68,14 +76,6 @@ ; LINUX-NEXT: movabsq $206158430216, %rax # imm = 0x3000000008 ; LINUX-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; LINUX-NEXT: movq %r14, %rdi -; LINUX-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUX-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUX-NEXT: movaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUX-NEXT: movaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUX-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUX-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUX-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; LINUX-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; LINUX-NEXT: callq get_f ; LINUX-NEXT: movq %rax, %r11 ; LINUX-NEXT: movq %r14, %rdi @@ -131,12 +131,25 @@ ; LINUX-X32-NEXT: .cfi_offset %r14, -32 ; LINUX-X32-NEXT: .cfi_offset %r15, -24 ; LINUX-X32-NEXT: .cfi_offset %rbp, -16 +; LINUX-X32-NEXT: movaps %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; LINUX-X32-NEXT: movaps %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; LINUX-X32-NEXT: movaps %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; LINUX-X32-NEXT: movaps %xmm4, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; LINUX-X32-NEXT: movaps %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; LINUX-X32-NEXT: movaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; LINUX-X32-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; LINUX-X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; LINUX-X32-NEXT: movq %r9, %r15 ; LINUX-X32-NEXT: movq %r8, %r12 ; LINUX-X32-NEXT: movq %rcx, %r13 ; LINUX-X32-NEXT: movq %rdx, %rbp ; LINUX-X32-NEXT: movq %rsi, %rbx ; LINUX-X32-NEXT: movl %edi, %r14d +; LINUX-X32-NEXT: movq %rsi, {{[0-9]+}}(%esp) +; LINUX-X32-NEXT: movq %rdx, {{[0-9]+}}(%esp) +; LINUX-X32-NEXT: movq %rcx, {{[0-9]+}}(%esp) +; LINUX-X32-NEXT: movq %r8, {{[0-9]+}}(%esp) +; LINUX-X32-NEXT: movq %r9, {{[0-9]+}}(%esp) ; LINUX-X32-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; LINUX-X32-NEXT: testb %al, %al ; LINUX-X32-NEXT: je .LBB0_2 @@ -150,11 +163,6 @@ ; LINUX-X32-NEXT: movaps %xmm6, {{[0-9]+}}(%esp) ; LINUX-X32-NEXT: movaps %xmm7, {{[0-9]+}}(%esp) ; LINUX-X32-NEXT: .LBB0_2: -; LINUX-X32-NEXT: movq %rbx, {{[0-9]+}}(%esp) -; LINUX-X32-NEXT: movq %rbp, {{[0-9]+}}(%esp) -; LINUX-X32-NEXT: movq %r13, {{[0-9]+}}(%esp) -; LINUX-X32-NEXT: movq %r12, {{[0-9]+}}(%esp) -; LINUX-X32-NEXT: movq %r15, {{[0-9]+}}(%esp) ; LINUX-X32-NEXT: leal {{[0-9]+}}(%rsp), %eax ; LINUX-X32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; LINUX-X32-NEXT: leal {{[0-9]+}}(%rsp), %eax @@ -162,14 +170,6 @@ ; LINUX-X32-NEXT: movabsq $206158430216, %rax # imm = 0x3000000008 ; LINUX-X32-NEXT: movq %rax, {{[0-9]+}}(%esp) ; LINUX-X32-NEXT: movl %r14d, %edi -; LINUX-X32-NEXT: movaps %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; LINUX-X32-NEXT: movaps %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; LINUX-X32-NEXT: movaps %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; LINUX-X32-NEXT: movaps %xmm4, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; LINUX-X32-NEXT: movaps %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; LINUX-X32-NEXT: movaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; LINUX-X32-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; LINUX-X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; LINUX-X32-NEXT: callq get_f ; LINUX-X32-NEXT: movl %eax, %r11d ; LINUX-X32-NEXT: movl %r14d, %edi diff --git a/llvm/test/CodeGen/X86/vastart-defs-eflags.ll b/llvm/test/CodeGen/X86/vastart-defs-eflags.ll --- a/llvm/test/CodeGen/X86/vastart-defs-eflags.ll +++ b/llvm/test/CodeGen/X86/vastart-defs-eflags.ll @@ -9,6 +9,11 @@ ; CHECK-LABEL: check_flag: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: subq $56, %rsp +; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %r9, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je LBB0_2 ; CHECK-NEXT: ## %bb.1: ## %entry @@ -21,11 +26,6 @@ ; CHECK-NEXT: movaps %xmm6, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movaps %xmm7, {{[0-9]+}}(%rsp) ; CHECK-NEXT: LBB0_2: ## %entry -; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %r8, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %r9, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testl $512, %edi ## imm = 0x200 ; CHECK-NEXT: je LBB0_4 diff --git a/llvm/test/CodeGen/X86/x32-va_start.ll b/llvm/test/CodeGen/X86/x32-va_start.ll --- a/llvm/test/CodeGen/X86/x32-va_start.ll +++ b/llvm/test/CodeGen/X86/x32-va_start.ll @@ -27,6 +27,11 @@ call void @llvm.lifetime.start.p0i8(i64 16, i8* %0) #2 call void @llvm.va_start(i8* %0) ; SSE: subl $72, %esp +; CHECK-DAG: movq %r9 +; CHECK-DAG: movq %r8 +; CHECK-DAG: movq %rcx +; CHECK-DAG: movq %rdx +; CHECK-DAG: movq %rsi ; SSE: testb %al, %al ; SSE: je .[[NOFP:.*]] ; SSE-DAG: movaps %xmm1 @@ -38,11 +43,6 @@ ; SSE-DAG: movaps %xmm7 ; NOSSE-NOT: xmm ; SSE: .[[NOFP]]: -; CHECK-DAG: movq %r9 -; CHECK-DAG: movq %r8 -; CHECK-DAG: movq %rcx -; CHECK-DAG: movq %rdx -; CHECK-DAG: movq %rsi %gp_offset_p = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 0 %gp_offset = load i32, i32* %gp_offset_p, align 16 %fits_in_gp = icmp ult i32 %gp_offset, 41 diff --git a/llvm/test/CodeGen/X86/xmm-vararg-noopt.ll b/llvm/test/CodeGen/X86/xmm-vararg-noopt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/xmm-vararg-noopt.ll @@ -0,0 +1,49 @@ +; RUN: llc -O0 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s + +; CHECK-LABEL: testvarargs +; Ensure that xmm registers are not used before testing %al +; CHECK-NOT: xmm +; CHECK: testb %al, %al +; CHECK-NOT: xmm +; CHECK: # %bb.1 +; CHECK-NEXT: %xmm0, {{.*}}%rsp +; CHECK-NEXT: %xmm1, {{.*}}%rsp +; CHECK-NEXT: %xmm2, {{.*}}%rsp +; CHECK-NEXT: %xmm3, {{.*}}%rsp +; CHECK-NEXT: %xmm4, {{.*}}%rsp +; CHECK-NEXT: %xmm5, {{.*}}%rsp +; CHECK-NEXT: %xmm6, {{.*}}%rsp +; CHECK-NEXT: %xmm7, {{.*}}%rsp + +; ModuleID = 'variadic.c' +source_filename = "variadic.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux" + +%struct.__va_list_tag = type { i32, i32, i8*, i8* } + +@.str = private unnamed_addr constant [9 x i8] c"\0A hello \00", align 1 + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local void @testvarargs(i8* %fmt, ...) { +entry: + %fmt.addr = alloca i8*, align 8 + %va = alloca [1 x %struct.__va_list_tag], align 16 + store i8* %fmt, i8** %fmt.addr, align 8 + %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %va, i64 0, i64 0 + %arraydecay1 = bitcast %struct.__va_list_tag* %arraydecay to i8* + call void @llvm.va_start(i8* %arraydecay1) + %arraydecay2 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %va, i64 0, i64 0 + %arraydecay23 = bitcast %struct.__va_list_tag* %arraydecay2 to i8* + call void @llvm.va_end(i8* %arraydecay23) + %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0)) + ret void +} + +; Function Attrs: nounwind +declare void @llvm.va_start(i8*) + +; Function Attrs: nounwind +declare void @llvm.va_end(i8*) + +declare dso_local i32 @printf(i8*, ...)