diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -1235,12 +1235,20 @@ const MachineFrameInfo &MFI = MF.getFrameInfo(); Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment. Align StackAlign = getStackAlign(); - if (MF.getFunction().hasFnAttribute("stackrealign")) { + bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign"); + if (HasRealign) { if (MFI.hasCalls()) MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; else if (MaxAlign < SlotSize) MaxAlign = Align(SlotSize); } + + if (!Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR) { + if (HasRealign) + MaxAlign = Align(std::lcm(16, MaxAlign.value())); + else + MaxAlign = Align(16); + } return MaxAlign.value(); } diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h --- a/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/llvm/lib/Target/X86/X86RegisterInfo.h @@ -133,6 +133,8 @@ bool canRealignStack(const MachineFunction &MF) const override; + bool shouldRealignStack(const MachineFunction &MF) const override; + void eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned FIOperandNum, Register BaseReg, int FIOffset) const; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -743,6 +743,13 @@ return true; } +bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const { + if (TargetRegisterInfo::shouldRealignStack(MF)) + return true; + + return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR; +} + // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'. // TODO: In this case we should be really trying first to entirely eliminate diff --git a/llvm/test/CodeGen/X86/x86-32-intrcc.ll b/llvm/test/CodeGen/X86/x86-32-intrcc.ll --- a/llvm/test/CodeGen/X86/x86-32-intrcc.ll +++ b/llvm/test/CodeGen/X86/x86-32-intrcc.ll @@ -11,13 +11,18 @@ ; No stack adjustment if declared with no error code define x86_intrcc void @test_isr_no_ecode(ptr byval(%struct.interrupt_frame) %frame) { ; CHECK-LABEL: test_isr_no_ecode: + ; CHECK: pushl %ebp + ; CHECK: movl %esp, %ebp ; CHECK: pushl %eax - ; CHECK: movl 12(%esp), %eax + ; CHECK: andl $-16, %esp + ; CHECK: movl 12(%ebp), %eax + ; CHECK: leal -4(%ebp), %esp ; CHECK: popl %eax + ; CHECK: popl %ebp ; CHECK: iretl ; CHECK0-LABEL: test_isr_no_ecode: ; CHECK0: pushl %eax - ; CHECK0: leal 4(%esp), %eax + ; CHECK0: leal 4(%ebp), %eax ; CHECK0: movl 8(%eax), %eax ; CHECK0: popl %eax ; CHECK0: iretl @@ -31,22 +36,29 @@ ; before return, popping the error code. define x86_intrcc void @test_isr_ecode(ptr byval(%struct.interrupt_frame) %frame, i32 %ecode) { ; CHECK-LABEL: test_isr_ecode + ; CHECK: pushl %ebp + ; CHECK: movl %esp, %ebp ; CHECK: pushl %ecx ; CHECK: pushl %eax - ; CHECK: movl 8(%esp), %eax - ; CHECK: movl 20(%esp), %ecx + ; CHECK: andl $-16, %esp + ; CHECK: movl 4(%ebp), %eax + ; CHECK: movl 16(%ebp), %ecx + ; CHECK: leal -8(%ebp), %esp ; CHECK: popl %eax ; CHECK: popl %ecx + ; CHECK: popl %ebp ; CHECK: addl $4, %esp ; CHECK: iretl ; CHECK0-LABEL: test_isr_ecode ; CHECK0: pushl %ecx ; CHECK0: pushl %eax - ; CHECK0: movl 8(%esp), %ecx - ; CHECK0: leal 12(%esp), %eax + ; CHECK0: movl 4(%ebp), %ecx + ; CHECK0: leal 8(%ebp), %eax ; CHECK0: movl 8(%eax), %eax + ; CHECK0: leal -8(%ebp), %esp ; CHECK0: popl %eax ; CHECK0: popl %ecx + ; CHECK0: popl %ebp ; CHECK0: addl $4, %esp ; CHECK0: iretl %pflags = getelementptr inbounds %struct.interrupt_frame, ptr %frame, i32 0, i32 2 @@ -88,7 +100,7 @@ ; CHECK-DAG: fld1 ; CHECK: faddp ; CHECK-NEXT: fstpt f80 - ; CHECK-NEXT: iretl + ; CHECK: iretl entry: %ld = load x86_fp80, ptr @f80, align 4 %add = fadd x86_fp80 %ld, 0xK3FFF8000000000000000 @@ -156,4 +168,45 @@ ret void } +; Disabling dynamic realignment with attributes should work +define x86_intrcc void @test_isr_no_realign(ptr byval(%struct.interrupt_frame) %frame) #1 { + ; CHECK-LABEL: test_isr_no_realign: + ; CHECK: pushl %eax + ; CHECK: movl 12(%esp), %eax + ; CHECK: popl %eax + ; CHECK: iretl + ; CHECK0-LABEL: test_isr_no_realign: + ; CHECK0: pushl %eax + ; CHECK0: leal 4(%esp), %eax + ; CHECK0: movl 8(%eax), %eax + ; CHECK0: popl %eax + ; CHECK0: iretl + %pflags = getelementptr inbounds %struct.interrupt_frame, ptr %frame, i32 0, i32 2 + %flags = load i32, ptr %pflags, align 4 + call void asm sideeffect "", "r"(i32 %flags) + ret void +} + +; The stackrealign attribute should work, and the function's alignment +; should be respected over the default 16-byte alignment required by the calling +; convention. +define x86_intrcc void @test_isr_realign(ptr byval(%struct.interrupt_frame) %frame, i32 %ecode) #2 { + ; CHECK-LABEL: test_isr_realign: + ; CHECK: pushl %ebp + ; CHECK: movl %esp, %ebp + ; CHECK: andl $-32, %esp + ; CHECK: iretl + ; CHECK0-LABEL: test_isr_realign: + ; CHECK0: pushl %ebp + ; CHECK0: movl %esp, %ebp + ; CHECK0: andl $-32, %esp + ; CHECK0: iretl + %ecode.stack = alloca i32, align 32 + store i32 %ecode, ptr %ecode.stack + ret void +} + + attributes #0 = { nounwind "frame-pointer"="all" } +attributes #1 = { "no-realign-stack" } +attributes #2 = { "stackrealign" } diff --git a/llvm/test/CodeGen/X86/x86-interrupt_cc.ll b/llvm/test/CodeGen/X86/x86-interrupt_cc.ll --- a/llvm/test/CodeGen/X86/x86-interrupt_cc.ll +++ b/llvm/test/CodeGen/X86/x86-interrupt_cc.ll @@ -506,50 +506,52 @@ ; ; CHECK32-KNL-LABEL: foo: ; CHECK32-KNL: ## %bb.0: -; CHECK32-KNL-NEXT: pushl %edx ## encoding: [0x52] +; CHECK32-KNL-NEXT: pushl %ebp ## encoding: [0x55] ; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-KNL-NEXT: .cfi_offset %ebp, -8 +; CHECK32-KNL-NEXT: movl %esp, %ebp ## encoding: [0x89,0xe5] +; CHECK32-KNL-NEXT: .cfi_def_cfa_register %ebp +; CHECK32-KNL-NEXT: pushl %edx ## encoding: [0x52] ; CHECK32-KNL-NEXT: pushl %ecx ## encoding: [0x51] -; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 12 ; CHECK32-KNL-NEXT: pushl %eax ## encoding: [0x50] -; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-KNL-NEXT: andl $-16, %esp ## encoding: [0x83,0xe4,0xf0] ; CHECK32-KNL-NEXT: subl $560, %esp ## encoding: [0x81,0xec,0x30,0x02,0x00,0x00] ; CHECK32-KNL-NEXT: ## imm = 0x230 ; CHECK32-KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xbc,0x24,0x2e,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x7d,0xf2] ; CHECK32-KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xb4,0x24,0x2c,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x75,0xf0] ; CHECK32-KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xac,0x24,0x2a,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x6d,0xee] ; CHECK32-KNL-NEXT: kmovw %k4, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xa4,0x24,0x28,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x65,0xec] ; CHECK32-KNL-NEXT: kmovw %k3, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x9c,0x24,0x26,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x5d,0xea] ; CHECK32-KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x94,0x24,0x24,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x55,0xe8] ; CHECK32-KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x8c,0x24,0x22,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x4d,0xe6] ; CHECK32-KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x84,0x24,0x20,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x45,0xe4] ; CHECK32-KNL-NEXT: vmovups %zmm7, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbd,0x88,0xff,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups %zmm6, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xb5,0x48,0xff,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups %zmm5, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xad,0x08,0xff,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups %zmm4, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xa5,0xc8,0xfe,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups %zmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x9d,0x88,0xfe,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups %zmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x95,0x48,0xfe,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01] -; CHECK32-KNL-NEXT: vmovups %zmm0, (%esp) ## 64-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24] -; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 576 -; CHECK32-KNL-NEXT: .cfi_offset %eax, -16 -; CHECK32-KNL-NEXT: .cfi_offset %ecx, -12 -; CHECK32-KNL-NEXT: .cfi_offset %edx, -8 +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x8d,0x08,0xfe,0xff,0xff] +; CHECK32-KNL-NEXT: vmovups %zmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x85,0xc8,0xfd,0xff,0xff] +; CHECK32-KNL-NEXT: .cfi_offset %eax, -20 +; CHECK32-KNL-NEXT: .cfi_offset %ecx, -16 +; CHECK32-KNL-NEXT: .cfi_offset %edx, -12 ; CHECK32-KNL-NEXT: .cfi_offset %xmm0, -576 ; CHECK32-KNL-NEXT: .cfi_offset %xmm1, -512 ; CHECK32-KNL-NEXT: .cfi_offset %xmm2, -448 @@ -558,102 +560,104 @@ ; CHECK32-KNL-NEXT: .cfi_offset %xmm5, -256 ; CHECK32-KNL-NEXT: .cfi_offset %xmm6, -192 ; CHECK32-KNL-NEXT: .cfi_offset %xmm7, -128 -; CHECK32-KNL-NEXT: .cfi_offset %k0, -32 -; CHECK32-KNL-NEXT: .cfi_offset %k1, -30 -; CHECK32-KNL-NEXT: .cfi_offset %k2, -28 -; CHECK32-KNL-NEXT: .cfi_offset %k3, -26 -; CHECK32-KNL-NEXT: .cfi_offset %k4, -24 -; CHECK32-KNL-NEXT: .cfi_offset %k5, -22 -; CHECK32-KNL-NEXT: .cfi_offset %k6, -20 -; CHECK32-KNL-NEXT: .cfi_offset %k7, -18 +; CHECK32-KNL-NEXT: .cfi_offset %k0, -36 +; CHECK32-KNL-NEXT: .cfi_offset %k1, -34 +; CHECK32-KNL-NEXT: .cfi_offset %k2, -32 +; CHECK32-KNL-NEXT: .cfi_offset %k3, -30 +; CHECK32-KNL-NEXT: .cfi_offset %k4, -28 +; CHECK32-KNL-NEXT: .cfi_offset %k5, -26 +; CHECK32-KNL-NEXT: .cfi_offset %k6, -24 +; CHECK32-KNL-NEXT: .cfi_offset %k7, -22 ; CHECK32-KNL-NEXT: cld ## encoding: [0xfc] ; CHECK32-KNL-NEXT: calll _bar ## encoding: [0xe8,A,A,A,A] ; CHECK32-KNL-NEXT: ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4 -; CHECK32-KNL-NEXT: vmovups (%esp), %zmm0 ## 64-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24] +; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm0 ## 64-byte Reload +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x85,0xc8,0xfd,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm1 ## 64-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x8d,0x08,0xfe,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm2 ## 64-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x95,0x48,0xfe,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm3 ## 64-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x9d,0x88,0xfe,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm4 ## 64-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xa5,0xc8,0xfe,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm5 ## 64-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xad,0x08,0xff,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm6 ## 64-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xb5,0x48,0xff,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm7 ## 64-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xbd,0x88,0xff,0xff,0xff] ; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 ## 2-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x84,0x24,0x20,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x45,0xe4] ; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x8c,0x24,0x22,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x4d,0xe6] ; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 2-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x94,0x24,0x24,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x55,0xe8] ; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k3 ## 2-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x9c,0x24,0x26,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x5d,0xea] ; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k4 ## 2-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xa4,0x24,0x28,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x65,0xec] ; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 2-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xac,0x24,0x2a,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x6d,0xee] ; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xb4,0x24,0x2c,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x75,0xf0] ; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xbc,0x24,0x2e,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: addl $560, %esp ## encoding: [0x81,0xc4,0x30,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: ## imm = 0x230 +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x7d,0xf2] +; CHECK32-KNL-NEXT: leal -12(%ebp), %esp ## encoding: [0x8d,0x65,0xf4] ; CHECK32-KNL-NEXT: popl %eax ## encoding: [0x58] ; CHECK32-KNL-NEXT: popl %ecx ## encoding: [0x59] ; CHECK32-KNL-NEXT: popl %edx ## encoding: [0x5a] +; CHECK32-KNL-NEXT: popl %ebp ## encoding: [0x5d] ; CHECK32-KNL-NEXT: iretl ## encoding: [0xcf] ; ; CHECK32-SKX-LABEL: foo: ; CHECK32-SKX: ## %bb.0: -; CHECK32-SKX-NEXT: pushl %edx ## encoding: [0x52] +; CHECK32-SKX-NEXT: pushl %ebp ## encoding: [0x55] ; CHECK32-SKX-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-SKX-NEXT: .cfi_offset %ebp, -8 +; CHECK32-SKX-NEXT: movl %esp, %ebp ## encoding: [0x89,0xe5] +; CHECK32-SKX-NEXT: .cfi_def_cfa_register %ebp +; CHECK32-SKX-NEXT: pushl %edx ## encoding: [0x52] ; CHECK32-SKX-NEXT: pushl %ecx ## encoding: [0x51] -; CHECK32-SKX-NEXT: .cfi_def_cfa_offset 12 ; CHECK32-SKX-NEXT: pushl %eax ## encoding: [0x50] -; CHECK32-SKX-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-SKX-NEXT: andl $-16, %esp ## encoding: [0x83,0xe4,0xf0] ; CHECK32-SKX-NEXT: subl $624, %esp ## encoding: [0x81,0xec,0x70,0x02,0x00,0x00] ; CHECK32-SKX-NEXT: ## imm = 0x270 ; CHECK32-SKX-NEXT: kmovq %k7, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x7d,0xe8] ; CHECK32-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xb4,0x24,0x60,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x75,0xe0] ; CHECK32-SKX-NEXT: kmovq %k5, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xac,0x24,0x58,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x6d,0xd8] ; CHECK32-SKX-NEXT: kmovq %k4, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xa4,0x24,0x50,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x65,0xd0] ; CHECK32-SKX-NEXT: kmovq %k3, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x9c,0x24,0x48,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x5d,0xc8] ; CHECK32-SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x94,0x24,0x40,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x55,0xc0] ; CHECK32-SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x8c,0x24,0x38,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x4d,0xb8] ; CHECK32-SKX-NEXT: kmovq %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x45,0xb0] ; CHECK32-SKX-NEXT: vmovups %zmm7, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbd,0x48,0xff,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups %zmm6, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xb5,0x08,0xff,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups %zmm5, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xad,0xc8,0xfe,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups %zmm4, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xa5,0x88,0xfe,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups %zmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x9d,0x48,0xfe,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups %zmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x95,0x08,0xfe,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01] -; CHECK32-SKX-NEXT: vmovups %zmm0, (%esp) ## 64-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24] -; CHECK32-SKX-NEXT: .cfi_def_cfa_offset 640 -; CHECK32-SKX-NEXT: .cfi_offset %eax, -16 -; CHECK32-SKX-NEXT: .cfi_offset %ecx, -12 -; CHECK32-SKX-NEXT: .cfi_offset %edx, -8 +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x8d,0xc8,0xfd,0xff,0xff] +; CHECK32-SKX-NEXT: vmovups %zmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x85,0x88,0xfd,0xff,0xff] +; CHECK32-SKX-NEXT: .cfi_offset %eax, -20 +; CHECK32-SKX-NEXT: .cfi_offset %ecx, -16 +; CHECK32-SKX-NEXT: .cfi_offset %edx, -12 ; CHECK32-SKX-NEXT: .cfi_offset %xmm0, -640 ; CHECK32-SKX-NEXT: .cfi_offset %xmm1, -576 ; CHECK32-SKX-NEXT: .cfi_offset %xmm2, -512 @@ -662,55 +666,55 @@ ; CHECK32-SKX-NEXT: .cfi_offset %xmm5, -320 ; CHECK32-SKX-NEXT: .cfi_offset %xmm6, -256 ; CHECK32-SKX-NEXT: .cfi_offset %xmm7, -192 -; CHECK32-SKX-NEXT: .cfi_offset %k0, -80 -; CHECK32-SKX-NEXT: .cfi_offset %k1, -72 -; CHECK32-SKX-NEXT: .cfi_offset %k2, -64 -; CHECK32-SKX-NEXT: .cfi_offset %k3, -56 -; CHECK32-SKX-NEXT: .cfi_offset %k4, -48 -; CHECK32-SKX-NEXT: .cfi_offset %k5, -40 -; CHECK32-SKX-NEXT: .cfi_offset %k6, -32 -; CHECK32-SKX-NEXT: .cfi_offset %k7, -24 +; CHECK32-SKX-NEXT: .cfi_offset %k0, -88 +; CHECK32-SKX-NEXT: .cfi_offset %k1, -80 +; CHECK32-SKX-NEXT: .cfi_offset %k2, -72 +; CHECK32-SKX-NEXT: .cfi_offset %k3, -64 +; CHECK32-SKX-NEXT: .cfi_offset %k4, -56 +; CHECK32-SKX-NEXT: .cfi_offset %k5, -48 +; CHECK32-SKX-NEXT: .cfi_offset %k6, -40 +; CHECK32-SKX-NEXT: .cfi_offset %k7, -32 ; CHECK32-SKX-NEXT: cld ## encoding: [0xfc] ; CHECK32-SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; CHECK32-SKX-NEXT: calll _bar ## encoding: [0xe8,A,A,A,A] ; CHECK32-SKX-NEXT: ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4 -; CHECK32-SKX-NEXT: vmovups (%esp), %zmm0 ## 64-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24] +; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm0 ## 64-byte Reload +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x85,0x88,0xfd,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm1 ## 64-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x8d,0xc8,0xfd,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm2 ## 64-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x95,0x08,0xfe,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm3 ## 64-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x9d,0x48,0xfe,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm4 ## 64-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xa5,0x88,0xfe,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm5 ## 64-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xad,0xc8,0xfe,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm6 ## 64-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xb5,0x08,0xff,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm7 ## 64-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xbd,0x48,0xff,0xff,0xff] ; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x45,0xb0] ; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 8-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x8c,0x24,0x38,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x4d,0xb8] ; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 8-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x94,0x24,0x40,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x55,0xc0] ; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k3 ## 8-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x9c,0x24,0x48,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x5d,0xc8] ; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k4 ## 8-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xa4,0x24,0x50,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x65,0xd0] ; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 8-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xac,0x24,0x58,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x6d,0xd8] ; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 8-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xb4,0x24,0x60,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x75,0xe0] ; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xbc,0x24,0x68,0x02,0x00,0x00] -; CHECK32-SKX-NEXT: addl $624, %esp ## encoding: [0x81,0xc4,0x70,0x02,0x00,0x00] -; CHECK32-SKX-NEXT: ## imm = 0x270 +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x7d,0xe8] +; CHECK32-SKX-NEXT: leal -12(%ebp), %esp ## encoding: [0x8d,0x65,0xf4] ; CHECK32-SKX-NEXT: popl %eax ## encoding: [0x58] ; CHECK32-SKX-NEXT: popl %ecx ## encoding: [0x59] ; CHECK32-SKX-NEXT: popl %edx ## encoding: [0x5a] +; CHECK32-SKX-NEXT: popl %ebp ## encoding: [0x5d] ; CHECK32-SKX-NEXT: iretl ## encoding: [0xcf] call void @bar() ret void