diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3794,7 +3794,8 @@ const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); - assert(MF.getFrameInfo().getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && + const MachineFrameInfo &MFI = MF.getFrameInfo(); + assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && "Stack slot too small for store"); if (RC->getID() == X86::TILERegClassID) { unsigned Opc = X86::TILESTORED; @@ -3812,7 +3813,7 @@ unsigned Alignment = std::max(TRI->getSpillSize(*RC), 16); bool isAligned = (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) || - RI.canRealignStack(MF); + (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx)); unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget); addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx) .addReg(SrcReg, getKillRegState(isKill)); @@ -3838,10 +3839,11 @@ MO.setIsKill(true); } else { const MachineFunction &MF = *MBB.getParent(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Alignment = std::max(TRI->getSpillSize(*RC), 16); bool isAligned = (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) || - RI.canRealignStack(MF); + (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx)); unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget); addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), FrameIdx); diff --git a/llvm/test/CodeGen/X86/anyregcc.ll b/llvm/test/CodeGen/X86/anyregcc.ll --- a/llvm/test/CodeGen/X86/anyregcc.ll +++ b/llvm/test/CodeGen/X86/anyregcc.ll @@ -513,22 +513,22 @@ ;AVX: pushq %rdx ;AVX: pushq %rcx ;AVX: pushq %rbx -;AVX: vmovaps %ymm15 -;AVX-NEXT: vmovaps %ymm14 -;AVX-NEXT: vmovaps %ymm13 -;AVX-NEXT: vmovaps %ymm12 -;AVX-NEXT: vmovaps %ymm11 -;AVX-NEXT: vmovaps %ymm10 -;AVX-NEXT: vmovaps %ymm9 -;AVX-NEXT: vmovaps %ymm8 -;AVX-NEXT: vmovaps %ymm7 -;AVX-NEXT: vmovaps %ymm6 -;AVX-NEXT: vmovaps %ymm5 -;AVX-NEXT: vmovaps %ymm4 -;AVX-NEXT: vmovaps %ymm3 -;AVX-NEXT: vmovaps %ymm2 -;AVX-NEXT: vmovaps %ymm1 -;AVX-NEXT: vmovaps %ymm0 +;AVX: vmovups %ymm15 +;AVX-NEXT: vmovups %ymm14 +;AVX-NEXT: vmovups %ymm13 +;AVX-NEXT: vmovups %ymm12 +;AVX-NEXT: vmovups %ymm11 +;AVX-NEXT: vmovups %ymm10 +;AVX-NEXT: vmovups %ymm9 +;AVX-NEXT: vmovups %ymm8 +;AVX-NEXT: vmovups %ymm7 +;AVX-NEXT: vmovups %ymm6 +;AVX-NEXT: vmovups %ymm5 +;AVX-NEXT: vmovups %ymm4 +;AVX-NEXT: vmovups %ymm3 +;AVX-NEXT: vmovups %ymm2 +;AVX-NEXT: vmovups %ymm1 +;AVX-NEXT: vmovups %ymm0 call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"() ret void } diff --git a/llvm/test/CodeGen/X86/avx-intel-ocl.ll b/llvm/test/CodeGen/X86/avx-intel-ocl.ll --- a/llvm/test/CodeGen/X86/avx-intel-ocl.ll +++ b/llvm/test/CodeGen/X86/avx-intel-ocl.ll @@ -67,27 +67,27 @@ ; test calling conventions - prolog and epilog ; WIN64-LABEL: test_prolog_epilog -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill ; WIN64: call -; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload ; X64-LABEL: test_prolog_epilog ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Spill diff --git a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll --- a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll +++ b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll @@ -190,44 +190,44 @@ ; WIN64-KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; WIN64-KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; WIN64-KNL-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; WIN64-KNL-NEXT: vmovaps %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-KNL-NEXT: vmovaps %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-KNL-NEXT: vmovaps %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-KNL-NEXT: vmovaps %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-KNL-NEXT: vmovaps %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-KNL-NEXT: vmovaps %zmm16, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-KNL-NEXT: vmovaps %zmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-KNL-NEXT: vmovaps %zmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-KNL-NEXT: vmovaps %zmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-KNL-NEXT: vmovaps %zmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-KNL-NEXT: vmovaps %zmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-KNL-NEXT: vmovaps %zmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-KNL-NEXT: vmovaps %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-KNL-NEXT: vmovaps %zmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-KNL-NEXT: vmovaps %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-KNL-NEXT: vmovaps %zmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-KNL-NEXT: vmovups %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-KNL-NEXT: vmovups %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-KNL-NEXT: vmovups %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-KNL-NEXT: vmovups %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-KNL-NEXT: vmovups %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-KNL-NEXT: vmovups %zmm16, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-KNL-NEXT: vmovups %zmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-KNL-NEXT: vmovups %zmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-KNL-NEXT: vmovups %zmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-KNL-NEXT: vmovups %zmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-KNL-NEXT: vmovups %zmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-KNL-NEXT: vmovups %zmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-KNL-NEXT: vmovups %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-KNL-NEXT: vmovups %zmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-KNL-NEXT: vmovups %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-KNL-NEXT: vmovups %zmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; WIN64-KNL-NEXT: andq $-64, %rsp ; WIN64-KNL-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp) ; WIN64-KNL-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) ; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN64-KNL-NEXT: callq func_float16 -; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm6 # 64-byte Reload -; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 # 64-byte Reload -; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm8 # 64-byte Reload -; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm9 # 64-byte Reload -; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm10 # 64-byte Reload -; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm11 # 64-byte Reload -; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm12 # 64-byte Reload -; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm13 # 64-byte Reload -; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm14 # 64-byte Reload -; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm15 # 64-byte Reload -; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm16 # 64-byte Reload -; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 # 64-byte Reload -; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 # 64-byte Reload -; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 # 64-byte Reload -; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 # 64-byte Reload -; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 # 64-byte Reload +; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm6 # 64-byte Reload +; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 # 64-byte Reload +; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm8 # 64-byte Reload +; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm9 # 64-byte Reload +; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm10 # 64-byte Reload +; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm11 # 64-byte Reload +; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm12 # 64-byte Reload +; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm13 # 64-byte Reload +; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm14 # 64-byte Reload +; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm15 # 64-byte Reload +; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm16 # 64-byte Reload +; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 # 64-byte Reload +; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 # 64-byte Reload +; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 # 64-byte Reload +; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 # 64-byte Reload +; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 # 64-byte Reload ; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload ; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload @@ -245,44 +245,44 @@ ; WIN64-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; WIN64-SKX-NEXT: kmovq %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; WIN64-SKX-NEXT: kmovq %k4, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; WIN64-SKX-NEXT: vmovaps %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-SKX-NEXT: vmovaps %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-SKX-NEXT: vmovaps %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-SKX-NEXT: vmovaps %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-SKX-NEXT: vmovaps %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-SKX-NEXT: vmovaps %zmm16, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-SKX-NEXT: vmovaps %zmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-SKX-NEXT: vmovaps %zmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-SKX-NEXT: vmovaps %zmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-SKX-NEXT: vmovaps %zmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-SKX-NEXT: vmovaps %zmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-SKX-NEXT: vmovaps %zmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-SKX-NEXT: vmovaps %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-SKX-NEXT: vmovaps %zmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-SKX-NEXT: vmovaps %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill -; WIN64-SKX-NEXT: vmovaps %zmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-SKX-NEXT: vmovups %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-SKX-NEXT: vmovups %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-SKX-NEXT: vmovups %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-SKX-NEXT: vmovups %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-SKX-NEXT: vmovups %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-SKX-NEXT: vmovups %zmm16, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-SKX-NEXT: vmovups %zmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-SKX-NEXT: vmovups %zmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-SKX-NEXT: vmovups %zmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-SKX-NEXT: vmovups %zmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-SKX-NEXT: vmovups %zmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-SKX-NEXT: vmovups %zmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-SKX-NEXT: vmovups %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-SKX-NEXT: vmovups %zmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-SKX-NEXT: vmovups %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; WIN64-SKX-NEXT: vmovups %zmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; WIN64-SKX-NEXT: andq $-64, %rsp ; WIN64-SKX-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp) ; WIN64-SKX-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) ; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN64-SKX-NEXT: callq func_float16 -; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm6 # 64-byte Reload -; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 # 64-byte Reload -; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm8 # 64-byte Reload -; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm9 # 64-byte Reload -; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm10 # 64-byte Reload -; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm11 # 64-byte Reload -; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm12 # 64-byte Reload -; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm13 # 64-byte Reload -; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm14 # 64-byte Reload -; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm15 # 64-byte Reload -; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm16 # 64-byte Reload -; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 # 64-byte Reload -; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 # 64-byte Reload -; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 # 64-byte Reload -; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 # 64-byte Reload -; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 # 64-byte Reload +; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm6 # 64-byte Reload +; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 # 64-byte Reload +; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm8 # 64-byte Reload +; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm9 # 64-byte Reload +; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm10 # 64-byte Reload +; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm11 # 64-byte Reload +; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm12 # 64-byte Reload +; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm13 # 64-byte Reload +; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm14 # 64-byte Reload +; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm15 # 64-byte Reload +; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm16 # 64-byte Reload +; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 # 64-byte Reload +; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 # 64-byte Reload +; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 # 64-byte Reload +; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 # 64-byte Reload +; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 # 64-byte Reload ; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 8-byte Reload ; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 8-byte Reload ; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 8-byte Reload diff --git a/llvm/test/CodeGen/X86/x86-64-xmm-spill-unaligned.ll b/llvm/test/CodeGen/X86/x86-64-xmm-spill-unaligned.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/x86-64-xmm-spill-unaligned.ll @@ -0,0 +1,15 @@ +; Make sure that when the stack may be misaligned on function entry, fixed frame +; elements (here: XMM spills) are accessed using instructions that tolerate +; unaligned access. +; +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64 -mattr=+sse,+sse-unaligned-mem -stack-alignment=8 --frame-pointer=all < %s | FileCheck %s + +define dso_local preserve_allcc void @func() #0 { +; CHECK-LABEL: func: +; CHECK: movups %xmm0, -{{[0-9]+}}(%rbp) + call void asm sideeffect "", "~{xmm0},~{dirflag},~{fpsr},~{flags}"() #1 +; CHECK: movups -{{[0-9]+}}(%rbp), %xmm0 + ret void +} + +attributes #0 = { nounwind }