diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -2018,7 +2018,8 @@ } virtual const TargetRegisterClass * - getVectorRegisterClassForSpill2Reg(const TargetRegisterInfo *TRI) const { + getVectorRegisterClassForSpill2Reg(const TargetRegisterInfo *TRI, + const TargetSubtargetInfo *STI) const { llvm_unreachable( "Target didn't implement " "TargetInstrInfo::createVirtualVectorRegisterForSpillToReg()"); @@ -2033,20 +2034,18 @@ } /// Inserts \p SrcReg into the first lane of \p DstReg. - virtual MachineInstr * - spill2RegInsertToVectorReg(Register DstReg, Register SrcReg, - int OperationBits, MachineBasicBlock *MBB, - MachineBasicBlock::iterator InsertBeforeIt, - const TargetRegisterInfo *TRI) const { + virtual MachineInstr *spill2RegInsertToVectorReg( + Register DstReg, Register SrcReg, int OperationBits, + MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertBeforeIt, + const TargetRegisterInfo *TRI, const TargetSubtargetInfo *STI) const { llvm_unreachable("Target does not implement this"); } /// Extracts the first lane of \p SrcReg into \p DstReg. - virtual MachineInstr * - spill2RegExtractFromVectorReg(Register DstReg, Register SrcReg, - int OperationBits, MachineBasicBlock *InsertMBB, - MachineBasicBlock::iterator InsertBeforeIt, - const TargetRegisterInfo *TRI) const { + virtual MachineInstr *spill2RegExtractFromVectorReg( + Register DstReg, Register SrcReg, int OperationBits, + MachineBasicBlock *InsertMBB, MachineBasicBlock::iterator InsertBeforeIt, + const TargetRegisterInfo *TRI, const TargetSubtargetInfo *STI) const { llvm_unreachable("Target does not implement this"); } diff --git a/llvm/lib/CodeGen/Spill2Reg.cpp b/llvm/lib/CodeGen/Spill2Reg.cpp --- a/llvm/lib/CodeGen/Spill2Reg.cpp +++ b/llvm/lib/CodeGen/Spill2Reg.cpp @@ -342,7 +342,7 @@ MachineInstr *SpillToVector = TII->spill2RegInsertToVectorReg( VectorReg, OldReg, SpillData.MemBits, StackSpill->getParent(), - /*InsertBeforeIt=*/StackSpill->getIterator(), TRI); + /*InsertBeforeIt=*/StackSpill->getIterator(), TRI, &MF->getSubtarget()); // Mark VectorReg as live in the instr's BB. LRUs[StackSpill->getParent()].addReg(VectorReg); @@ -359,7 +359,8 @@ MachineInstr *ReloadFromReg = TII->spill2RegExtractFromVectorReg( OldReg, VectorReg, ReloadData.MemBits, StackReload->getParent(), - /*InsertBeforeIt=*/StackReload->getIterator(), TRI); + /*InsertBeforeIt=*/StackReload->getIterator(), TRI, + &MF->getSubtarget()); // Mark VectorReg as live in the instr's BB. LRUs[StackReload->getParent()].addReg(VectorReg); @@ -471,7 +472,7 @@ // Look for a physical register that in LRU. llvm::Optional PhysVectorRegOpt = tryGetFreePhysicalReg( - TII->getVectorRegisterClassForSpill2Reg(TRI), LRU); + TII->getVectorRegisterClassForSpill2Reg(TRI, &MF->getSubtarget()), LRU); if (!PhysVectorRegOpt) continue; diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -667,7 +667,8 @@ bool targetSupportsSpill2Reg(const TargetSubtargetInfo *STI) const override; const TargetRegisterClass *getVectorRegisterClassForSpill2Reg( - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + const TargetSubtargetInfo *STI) const override; bool isSpill2RegProfitable(const MachineInstr *MI, const TargetRegisterInfo *TRI, @@ -681,13 +682,15 @@ spill2RegInsertToVectorReg(Register DstReg, Register SrcReg, int OperationBits, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertBeforeIt, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + const TargetSubtargetInfo *STI) const override; MachineInstr * spill2RegExtractFromVectorReg(Register DstReg, Register SrcReg, int OperationBits, MachineBasicBlock *InsertMBB, MachineBasicBlock::iterator InsertBeforeIt, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI, + const TargetSubtargetInfo *STI) const override; }; } // namespace llvm diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -89,6 +89,10 @@ cl::desc("When checking for profitability, explore up to this many nearby " "instructions.")); +static cl::opt Spill2RegNoAVX( + "spill2reg-no-avx", cl::Hidden, cl::init(false), + cl::desc("Don't use AVX instructions even if the targets supports them.")); + // Pin the vtable to this file. void X86InstrInfo::anchor() {} @@ -9530,10 +9534,16 @@ return X86STI->hasSSE41(); } +static inline bool useAVX(const TargetSubtargetInfo *STI) { + const X86Subtarget *X86STI = static_cast(STI); + bool UseAVX = X86STI->hasAVX() && !Spill2RegNoAVX; + return UseAVX; +} + const TargetRegisterClass *X86InstrInfo::getVectorRegisterClassForSpill2Reg( - const TargetRegisterInfo *TRI) const { - const TargetRegisterClass *VecRegClass = - TRI->getRegClass(X86::VR128RegClassID); + const TargetRegisterInfo *TRI, const TargetSubtargetInfo *STI) const { + const TargetRegisterClass *VecRegClass = TRI->getRegClass( + useAVX(STI) ? X86::VR128XRegClassID : X86::VR128RegClassID); return VecRegClass; } @@ -9579,14 +9589,22 @@ return MemHeuristic && VecHeuristic; } -static unsigned getInsertOrExtractOpcode(unsigned Bits, bool Insert) { +static unsigned getInsertOrExtractOpcode(unsigned Bits, bool Insert, + const TargetSubtargetInfo *STI) { + bool UseAVX = useAVX(STI); switch (Bits) { case 8: case 16: case 32: - return Insert ? X86::MOVDI2PDIrr : X86::MOVPDI2DIrr; + if (UseAVX) + return Insert ? X86::VMOVDI2PDIZrr : X86::VMOVPDI2DIZrr; + else + return Insert ? X86::MOVDI2PDIrr : X86::MOVPDI2DIrr; case 64: - return Insert ? X86::MOV64toPQIrr : X86::MOVPQIto64rr; + if (UseAVX) + return Insert ? X86::VMOV64toPQIZrr : X86::VMOVPQIto64Zrr; + else + return Insert ? X86::MOV64toPQIrr : X86::MOVPQIto64rr; default: llvm_unreachable("Unsupported bits"); } @@ -9624,11 +9642,11 @@ MachineInstr *X86InstrInfo::spill2RegInsertToVectorReg( Register DstReg, Register SrcReg, int OperationBits, MachineBasicBlock *MBB, - MachineBasicBlock::iterator InsertBeforeIt, - const TargetRegisterInfo *TRI) const { + MachineBasicBlock::iterator InsertBeforeIt, const TargetRegisterInfo *TRI, + const TargetSubtargetInfo *STI) const { DebugLoc DL; unsigned InsertOpcode = - getInsertOrExtractOpcode(OperationBits, true /*insert*/); + getInsertOrExtractOpcode(OperationBits, true /*insert*/, STI); const MCInstrDesc &InsertMCID = get(InsertOpcode); // `movd` does not support 8/16 bit operands. Instead, we use a 32-bit // register. For example: @@ -9645,10 +9663,10 @@ MachineInstr *X86InstrInfo::spill2RegExtractFromVectorReg( Register DstReg, Register SrcReg, int OperationBits, MachineBasicBlock *InsertMBB, MachineBasicBlock::iterator InsertBeforeIt, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, const TargetSubtargetInfo *STI) const { DebugLoc DL; unsigned ExtractOpcode = - getInsertOrExtractOpcode(OperationBits, false /*extract*/); + getInsertOrExtractOpcode(OperationBits, false /*extract*/, STI); const MCInstrDesc &ExtractMCID = get(ExtractOpcode); // `movd` does not support 8/16 bit operands. Instead, we use a 32-bit // register. For example: diff --git a/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll b/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll --- a/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll +++ b/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 | FileCheck %s +; RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+avx | FileCheck --check-prefix=AVX %s ; End-to-end check that Spill2Reg works with 16-bit registers. @@ -130,6 +131,90 @@ ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq +; +; AVX-LABEL: _Z5spillv: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rbp +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: pushq %r15 +; AVX-NEXT: .cfi_def_cfa_offset 24 +; AVX-NEXT: pushq %r14 +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: pushq %r13 +; AVX-NEXT: .cfi_def_cfa_offset 40 +; AVX-NEXT: pushq %r12 +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: pushq %rbx +; AVX-NEXT: .cfi_def_cfa_offset 56 +; AVX-NEXT: .cfi_offset %rbx, -56 +; AVX-NEXT: .cfi_offset %r12, -48 +; AVX-NEXT: .cfi_offset %r13, -40 +; AVX-NEXT: .cfi_offset %r14, -32 +; AVX-NEXT: .cfi_offset %r15, -24 +; AVX-NEXT: .cfi_offset %rbp, -16 +; AVX-NEXT: movw D0(%rip), %ax +; AVX-NEXT: vmovd %eax, %xmm3 +; AVX-NEXT: movzwl D1(%rip), %ecx +; AVX-NEXT: movzwl D2(%rip), %edx +; AVX-NEXT: movzwl D3(%rip), %esi +; AVX-NEXT: movzwl D4(%rip), %edi +; AVX-NEXT: movzwl D5(%rip), %r8d +; AVX-NEXT: movzwl D6(%rip), %r9d +; AVX-NEXT: movzwl D7(%rip), %r10d +; AVX-NEXT: movzwl D8(%rip), %r11d +; AVX-NEXT: movzwl D9(%rip), %ebx +; AVX-NEXT: movzwl D10(%rip), %ebp +; AVX-NEXT: movzwl D11(%rip), %r14d +; AVX-NEXT: movzwl D12(%rip), %r15d +; AVX-NEXT: movzwl D13(%rip), %r12d +; AVX-NEXT: movzwl D14(%rip), %r13d +; AVX-NEXT: movw D15(%rip), %ax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: movw D16(%rip), %ax +; AVX-NEXT: vmovd %eax, %xmm1 +; AVX-NEXT: movw D17(%rip), %ax +; AVX-NEXT: vmovd %eax, %xmm4 +; AVX-NEXT: movw D18(%rip), %ax +; AVX-NEXT: vmovd %eax, %xmm2 +; AVX-NEXT: #APP +; AVX-NEXT: #NO_APP +; AVX-NEXT: vmovd %xmm3, %eax +; AVX-NEXT: movw %ax, U0(%rip) +; AVX-NEXT: movw %cx, U1(%rip) +; AVX-NEXT: movw %dx, U2(%rip) +; AVX-NEXT: movw %si, U3(%rip) +; AVX-NEXT: movw %di, U4(%rip) +; AVX-NEXT: movw %r8w, U5(%rip) +; AVX-NEXT: movw %r9w, U6(%rip) +; AVX-NEXT: movw %r10w, U7(%rip) +; AVX-NEXT: movw %r11w, U8(%rip) +; AVX-NEXT: movw %bx, U9(%rip) +; AVX-NEXT: movw %bp, U10(%rip) +; AVX-NEXT: movw %r14w, U11(%rip) +; AVX-NEXT: movw %r15w, U12(%rip) +; AVX-NEXT: movw %r12w, U13(%rip) +; AVX-NEXT: movw %r13w, U14(%rip) +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: movw %ax, U15(%rip) +; AVX-NEXT: vmovd %xmm1, %eax +; AVX-NEXT: movw %ax, U16(%rip) +; AVX-NEXT: vmovd %xmm4, %eax +; AVX-NEXT: movw %ax, U17(%rip) +; AVX-NEXT: vmovd %xmm2, %eax +; AVX-NEXT: movw %ax, U18(%rip) +; AVX-NEXT: popq %rbx +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: popq %r12 +; AVX-NEXT: .cfi_def_cfa_offset 40 +; AVX-NEXT: popq %r13 +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: popq %r14 +; AVX-NEXT: .cfi_def_cfa_offset 24 +; AVX-NEXT: popq %r15 +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: popq %rbp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %0 = load i16, i16* @D0 %1 = load i16, i16* @D1 diff --git a/llvm/test/CodeGen/X86/spill2reg_end_to_end_32bit.ll b/llvm/test/CodeGen/X86/spill2reg_end_to_end_32bit.ll --- a/llvm/test/CodeGen/X86/spill2reg_end_to_end_32bit.ll +++ b/llvm/test/CodeGen/X86/spill2reg_end_to_end_32bit.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 | FileCheck %s +; RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 | FileCheck --check-prefix=AVX %s ; End-to-end check that Spill2Reg works with 32-bit registers. @@ -130,6 +131,90 @@ ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq +; +; AVX-LABEL: _Z5spillv: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rbp +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: pushq %r15 +; AVX-NEXT: .cfi_def_cfa_offset 24 +; AVX-NEXT: pushq %r14 +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: pushq %r13 +; AVX-NEXT: .cfi_def_cfa_offset 40 +; AVX-NEXT: pushq %r12 +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: pushq %rbx +; AVX-NEXT: .cfi_def_cfa_offset 56 +; AVX-NEXT: .cfi_offset %rbx, -56 +; AVX-NEXT: .cfi_offset %r12, -48 +; AVX-NEXT: .cfi_offset %r13, -40 +; AVX-NEXT: .cfi_offset %r14, -32 +; AVX-NEXT: .cfi_offset %r15, -24 +; AVX-NEXT: .cfi_offset %rbp, -16 +; AVX-NEXT: movl D0(%rip), %eax +; AVX-NEXT: movd %eax, %xmm3 +; AVX-NEXT: movl D1(%rip), %ecx +; AVX-NEXT: movl D2(%rip), %edx +; AVX-NEXT: movl D3(%rip), %esi +; AVX-NEXT: movl D4(%rip), %edi +; AVX-NEXT: movl D5(%rip), %r8d +; AVX-NEXT: movl D6(%rip), %r9d +; AVX-NEXT: movl D7(%rip), %r10d +; AVX-NEXT: movl D8(%rip), %r11d +; AVX-NEXT: movl D9(%rip), %ebx +; AVX-NEXT: movl D10(%rip), %ebp +; AVX-NEXT: movl D11(%rip), %r14d +; AVX-NEXT: movl D12(%rip), %r15d +; AVX-NEXT: movl D13(%rip), %r12d +; AVX-NEXT: movl D14(%rip), %r13d +; AVX-NEXT: movl D15(%rip), %eax +; AVX-NEXT: movd %eax, %xmm0 +; AVX-NEXT: movl D16(%rip), %eax +; AVX-NEXT: movd %eax, %xmm1 +; AVX-NEXT: movl D17(%rip), %eax +; AVX-NEXT: movd %eax, %xmm4 +; AVX-NEXT: movl D18(%rip), %eax +; AVX-NEXT: movd %eax, %xmm2 +; AVX-NEXT: #APP +; AVX-NEXT: #NO_APP +; AVX-NEXT: movd %xmm3, %eax +; AVX-NEXT: movl %eax, U0(%rip) +; AVX-NEXT: movl %ecx, U1(%rip) +; AVX-NEXT: movl %edx, U2(%rip) +; AVX-NEXT: movl %esi, U3(%rip) +; AVX-NEXT: movl %edi, U4(%rip) +; AVX-NEXT: movl %r8d, U5(%rip) +; AVX-NEXT: movl %r9d, U6(%rip) +; AVX-NEXT: movl %r10d, U7(%rip) +; AVX-NEXT: movl %r11d, U8(%rip) +; AVX-NEXT: movl %ebx, U9(%rip) +; AVX-NEXT: movl %ebp, U10(%rip) +; AVX-NEXT: movl %r14d, U11(%rip) +; AVX-NEXT: movl %r15d, U12(%rip) +; AVX-NEXT: movl %r12d, U13(%rip) +; AVX-NEXT: movl %r13d, U14(%rip) +; AVX-NEXT: movd %xmm0, %eax +; AVX-NEXT: movl %eax, U15(%rip) +; AVX-NEXT: movd %xmm1, %eax +; AVX-NEXT: movl %eax, U16(%rip) +; AVX-NEXT: movd %xmm4, %eax +; AVX-NEXT: movl %eax, U17(%rip) +; AVX-NEXT: movd %xmm2, %eax +; AVX-NEXT: movl %eax, U18(%rip) +; AVX-NEXT: popq %rbx +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: popq %r12 +; AVX-NEXT: .cfi_def_cfa_offset 40 +; AVX-NEXT: popq %r13 +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: popq %r14 +; AVX-NEXT: .cfi_def_cfa_offset 24 +; AVX-NEXT: popq %r15 +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: popq %rbp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %0 = load i32, i32* @D0 %1 = load i32, i32* @D1 diff --git a/llvm/test/CodeGen/X86/spill2reg_end_to_end_64bit.ll b/llvm/test/CodeGen/X86/spill2reg_end_to_end_64bit.ll --- a/llvm/test/CodeGen/X86/spill2reg_end_to_end_64bit.ll +++ b/llvm/test/CodeGen/X86/spill2reg_end_to_end_64bit.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 | FileCheck %s +; RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 | FileCheck --check-prefix=AVX %s ; End-to-end check that Spill2Reg works with 64-bit registers. @@ -130,6 +131,90 @@ ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq +; +; AVX-LABEL: _Z5spillv: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rbp +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: pushq %r15 +; AVX-NEXT: .cfi_def_cfa_offset 24 +; AVX-NEXT: pushq %r14 +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: pushq %r13 +; AVX-NEXT: .cfi_def_cfa_offset 40 +; AVX-NEXT: pushq %r12 +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: pushq %rbx +; AVX-NEXT: .cfi_def_cfa_offset 56 +; AVX-NEXT: .cfi_offset %rbx, -56 +; AVX-NEXT: .cfi_offset %r12, -48 +; AVX-NEXT: .cfi_offset %r13, -40 +; AVX-NEXT: .cfi_offset %r14, -32 +; AVX-NEXT: .cfi_offset %r15, -24 +; AVX-NEXT: .cfi_offset %rbp, -16 +; AVX-NEXT: movq D0(%rip), %rax +; AVX-NEXT: movq %rax, %xmm3 +; AVX-NEXT: movq D1(%rip), %rcx +; AVX-NEXT: movq D2(%rip), %rdx +; AVX-NEXT: movq D3(%rip), %rsi +; AVX-NEXT: movq D4(%rip), %rdi +; AVX-NEXT: movq D5(%rip), %r8 +; AVX-NEXT: movq D6(%rip), %r9 +; AVX-NEXT: movq D7(%rip), %r10 +; AVX-NEXT: movq D8(%rip), %r11 +; AVX-NEXT: movq D9(%rip), %rbx +; AVX-NEXT: movq D10(%rip), %r14 +; AVX-NEXT: movq D11(%rip), %r15 +; AVX-NEXT: movq D12(%rip), %r12 +; AVX-NEXT: movq D13(%rip), %r13 +; AVX-NEXT: movq D14(%rip), %rbp +; AVX-NEXT: movq D15(%rip), %rax +; AVX-NEXT: movq %rax, %xmm0 +; AVX-NEXT: movq D16(%rip), %rax +; AVX-NEXT: movq %rax, %xmm1 +; AVX-NEXT: movq D17(%rip), %rax +; AVX-NEXT: movq %rax, %xmm4 +; AVX-NEXT: movq D18(%rip), %rax +; AVX-NEXT: movq %rax, %xmm2 +; AVX-NEXT: #APP +; AVX-NEXT: #NO_APP +; AVX-NEXT: movq %xmm3, %rax +; AVX-NEXT: movq %rax, U0(%rip) +; AVX-NEXT: movq %rcx, U1(%rip) +; AVX-NEXT: movq %rdx, U2(%rip) +; AVX-NEXT: movq %rsi, U3(%rip) +; AVX-NEXT: movq %rdi, U4(%rip) +; AVX-NEXT: movq %r8, U5(%rip) +; AVX-NEXT: movq %r9, U6(%rip) +; AVX-NEXT: movq %r10, U7(%rip) +; AVX-NEXT: movq %r11, U8(%rip) +; AVX-NEXT: movq %rbx, U9(%rip) +; AVX-NEXT: movq %r14, U10(%rip) +; AVX-NEXT: movq %r15, U11(%rip) +; AVX-NEXT: movq %r12, U12(%rip) +; AVX-NEXT: movq %r13, U13(%rip) +; AVX-NEXT: movq %rbp, U14(%rip) +; AVX-NEXT: movq %xmm0, %rax +; AVX-NEXT: movq %rax, U15(%rip) +; AVX-NEXT: movq %xmm1, %rax +; AVX-NEXT: movq %rax, U16(%rip) +; AVX-NEXT: movq %xmm4, %rax +; AVX-NEXT: movq %rax, U17(%rip) +; AVX-NEXT: movq %xmm2, %rax +; AVX-NEXT: movq %rax, U18(%rip) +; AVX-NEXT: popq %rbx +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: popq %r12 +; AVX-NEXT: .cfi_def_cfa_offset 40 +; AVX-NEXT: popq %r13 +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: popq %r14 +; AVX-NEXT: .cfi_def_cfa_offset 24 +; AVX-NEXT: popq %r15 +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: popq %rbp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %0 = load i64, i64* @D0 %1 = load i64, i64* @D1 diff --git a/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll b/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll --- a/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll +++ b/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 | FileCheck %s +; RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+avx | FileCheck --check-prefix=AVX %s ; End-to-end check that Spill2Reg works with 8-bit registers. @@ -130,6 +131,90 @@ ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq +; +; AVX-LABEL: _Z5spillv: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rbp +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: pushq %r15 +; AVX-NEXT: .cfi_def_cfa_offset 24 +; AVX-NEXT: pushq %r14 +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: pushq %r13 +; AVX-NEXT: .cfi_def_cfa_offset 40 +; AVX-NEXT: pushq %r12 +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: pushq %rbx +; AVX-NEXT: .cfi_def_cfa_offset 56 +; AVX-NEXT: .cfi_offset %rbx, -56 +; AVX-NEXT: .cfi_offset %r12, -48 +; AVX-NEXT: .cfi_offset %r13, -40 +; AVX-NEXT: .cfi_offset %r14, -32 +; AVX-NEXT: .cfi_offset %r15, -24 +; AVX-NEXT: .cfi_offset %rbp, -16 +; AVX-NEXT: movb D0(%rip), %al +; AVX-NEXT: vmovd %eax, %xmm3 +; AVX-NEXT: movb D1(%rip), %cl +; AVX-NEXT: movb D2(%rip), %dl +; AVX-NEXT: movb D3(%rip), %sil +; AVX-NEXT: movb D4(%rip), %dil +; AVX-NEXT: movb D5(%rip), %r8b +; AVX-NEXT: movb D6(%rip), %r9b +; AVX-NEXT: movb D7(%rip), %r10b +; AVX-NEXT: movb D8(%rip), %r11b +; AVX-NEXT: movb D9(%rip), %bl +; AVX-NEXT: movb D10(%rip), %bpl +; AVX-NEXT: movb D11(%rip), %r14b +; AVX-NEXT: movb D12(%rip), %r15b +; AVX-NEXT: movb D13(%rip), %r12b +; AVX-NEXT: movb D14(%rip), %r13b +; AVX-NEXT: movb D15(%rip), %al +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: movb D16(%rip), %al +; AVX-NEXT: vmovd %eax, %xmm1 +; AVX-NEXT: movb D17(%rip), %al +; AVX-NEXT: vmovd %eax, %xmm4 +; AVX-NEXT: movb D18(%rip), %al +; AVX-NEXT: vmovd %eax, %xmm2 +; AVX-NEXT: #APP +; AVX-NEXT: #NO_APP +; AVX-NEXT: vmovd %xmm3, %eax +; AVX-NEXT: movb %al, U0(%rip) +; AVX-NEXT: movb %cl, U1(%rip) +; AVX-NEXT: movb %dl, U2(%rip) +; AVX-NEXT: movb %sil, U3(%rip) +; AVX-NEXT: movb %dil, U4(%rip) +; AVX-NEXT: movb %r8b, U5(%rip) +; AVX-NEXT: movb %r9b, U6(%rip) +; AVX-NEXT: movb %r10b, U7(%rip) +; AVX-NEXT: movb %r11b, U8(%rip) +; AVX-NEXT: movb %bl, U9(%rip) +; AVX-NEXT: movb %bpl, U10(%rip) +; AVX-NEXT: movb %r14b, U11(%rip) +; AVX-NEXT: movb %r15b, U12(%rip) +; AVX-NEXT: movb %r12b, U13(%rip) +; AVX-NEXT: movb %r13b, U14(%rip) +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: movb %al, U15(%rip) +; AVX-NEXT: vmovd %xmm1, %eax +; AVX-NEXT: movb %al, U16(%rip) +; AVX-NEXT: vmovd %xmm4, %eax +; AVX-NEXT: movb %al, U17(%rip) +; AVX-NEXT: vmovd %xmm2, %eax +; AVX-NEXT: movb %al, U18(%rip) +; AVX-NEXT: popq %rbx +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: popq %r12 +; AVX-NEXT: .cfi_def_cfa_offset 40 +; AVX-NEXT: popq %r13 +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: popq %r14 +; AVX-NEXT: .cfi_def_cfa_offset 24 +; AVX-NEXT: popq %r15 +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: popq %rbp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq entry: %0 = load i8, i8* @D0 %1 = load i8, i8* @D1 diff --git a/llvm/test/CodeGen/X86/spill2reg_simple_1_16bit.mir b/llvm/test/CodeGen/X86/spill2reg_simple_1_16bit.mir --- a/llvm/test/CodeGen/X86/spill2reg_simple_1_16bit.mir +++ b/llvm/test/CodeGen/X86/spill2reg_simple_1_16bit.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck %s +# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+avx --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck --check-prefix=AVX %s # Simple test with a single 16-bit spill-reload pair: # spill stack.0 @@ -30,6 +31,12 @@ ; CHECK-NEXT: $eax = MOVPDI2DIrr $xmm0 ; CHECK-NEXT: MOV16mr $rip, 1, $noreg, @U0, $noreg, killed renamable $ax :: (store (s16) into @U0) ; CHECK-NEXT: RET 0 + ; AVX-LABEL: name: func + ; AVX: $ax = MOV16rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s16) from @D0) + ; AVX-NEXT: $xmm0 = VMOVDI2PDIZrr $eax + ; AVX-NEXT: $eax = VMOVPDI2DIZrr $xmm0 + ; AVX-NEXT: MOV16mr $rip, 1, $noreg, @U0, $noreg, killed renamable $ax :: (store (s16) into @U0) + ; AVX-NEXT: RET 0 $ax = MOV16rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s16) from @D0) MOV16mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $ax :: (store (s16) into %stack.0) ; reload diff --git a/llvm/test/CodeGen/X86/spill2reg_simple_1_32bit.mir b/llvm/test/CodeGen/X86/spill2reg_simple_1_32bit.mir --- a/llvm/test/CodeGen/X86/spill2reg_simple_1_32bit.mir +++ b/llvm/test/CodeGen/X86/spill2reg_simple_1_32bit.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck %s # RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=-sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck --check-prefix=NOSSE %s +# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+avx --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck --check-prefix=AVX %s # Simple test with a single spill-reload pair (32-bit version): # spill stack.0 @@ -24,6 +25,13 @@ body: | + ; AVX-LABEL: bb.0: + ; AVX-NEXT: $eax = MOV32rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s32) from @D0) + ; AVX-NEXT: $xmm0 = VMOVDI2PDIZrr $eax + ; AVX-NEXT: $eax = VMOVPDI2DIZrr $xmm0 + ; AVX-NEXT: MOV32mr $rip, 1, $noreg, @U0, $noreg, killed renamable $eax :: (store (s32) into @U0) + ; AVX-NEXT: RET 0 + bb.0: ; spill ; CHECK-LABEL: name: func diff --git a/llvm/test/CodeGen/X86/spill2reg_simple_1_64bit.mir b/llvm/test/CodeGen/X86/spill2reg_simple_1_64bit.mir --- a/llvm/test/CodeGen/X86/spill2reg_simple_1_64bit.mir +++ b/llvm/test/CodeGen/X86/spill2reg_simple_1_64bit.mir @@ -1,6 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck %s # RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=-sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck --check-prefix=NOSSE %s +# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+avx --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck --check-prefix=AVX %s +# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -spill2reg-no-avx -mattr=+avx --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck --check-prefix=NO_AVX_FLAG %s # Simple test with a single spill-reload pair (64-bit version): # spill stack.0 @@ -38,6 +40,18 @@ ; NOSSE-NEXT: $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) ; NOSSE-NEXT: MOV64mr $rip, 1, $noreg, @U0, $noreg, killed renamable $rax :: (store (s64) into @U0) ; NOSSE-NEXT: RET 0 + ; AVX-LABEL: name: func + ; AVX: $rax = MOV64rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s64) from @D0) + ; AVX-NEXT: $xmm0 = VMOV64toPQIZrr $rax + ; AVX-NEXT: $rax = VMOVPQIto64Zrr $xmm0 + ; AVX-NEXT: MOV64mr $rip, 1, $noreg, @U0, $noreg, killed renamable $rax :: (store (s64) into @U0) + ; AVX-NEXT: RET 0 + ; NO_AVX_FLAG-LABEL: name: func + ; NO_AVX_FLAG: $rax = MOV64rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s64) from @D0) + ; NO_AVX_FLAG-NEXT: $xmm0 = MOV64toPQIrr $rax + ; NO_AVX_FLAG-NEXT: $rax = MOVPQIto64rr $xmm0 + ; NO_AVX_FLAG-NEXT: MOV64mr $rip, 1, $noreg, @U0, $noreg, killed renamable $rax :: (store (s64) into @U0) + ; NO_AVX_FLAG-NEXT: RET 0 $rax = MOV64rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s64) from @D0) MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.0) ; reload diff --git a/llvm/test/CodeGen/X86/spill2reg_simple_1_8bit.mir b/llvm/test/CodeGen/X86/spill2reg_simple_1_8bit.mir --- a/llvm/test/CodeGen/X86/spill2reg_simple_1_8bit.mir +++ b/llvm/test/CodeGen/X86/spill2reg_simple_1_8bit.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck %s +# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+avx --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck --check-prefix=AVX %s # Simple test with a single 8-bit spill-reload pair: # spill stack.0 @@ -30,6 +31,12 @@ ; CHECK-NEXT: $eax = MOVPDI2DIrr $xmm0 ; CHECK-NEXT: MOV8mr $rip, 1, $noreg, @U0, $noreg, killed renamable $al :: (store (s8) into @U0) ; CHECK-NEXT: RET 0 + ; AVX-LABEL: name: func + ; AVX: $al = MOV8rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s8) from @D0) + ; AVX-NEXT: $xmm0 = VMOVDI2PDIZrr $eax + ; AVX-NEXT: $eax = VMOVPDI2DIZrr $xmm0 + ; AVX-NEXT: MOV8mr $rip, 1, $noreg, @U0, $noreg, killed renamable $al :: (store (s8) into @U0) + ; AVX-NEXT: RET 0 $al = MOV8rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s8) from @D0) MOV8mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $al :: (store (s8) into %stack.0) ; reload