diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -1235,12 +1235,20 @@ const MachineFrameInfo &MFI = MF.getFrameInfo(); Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment. Align StackAlign = getStackAlign(); - if (MF.getFunction().hasFnAttribute("stackrealign")) { + bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign"); + if (HasRealign) { if (MFI.hasCalls()) MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; else if (MaxAlign < SlotSize) MaxAlign = Align(SlotSize); } + + if (!Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR) { + if (HasRealign) + MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16); + else + MaxAlign = Align(16); + } return MaxAlign.value(); } diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h --- a/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/llvm/lib/Target/X86/X86RegisterInfo.h @@ -133,6 +133,8 @@ bool canRealignStack(const MachineFunction &MF) const override; + bool shouldRealignStack(const MachineFunction &MF) const override; + void eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned FIOperandNum, Register BaseReg, int FIOffset) const; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -743,6 +743,13 @@ return true; } +bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const { + if (TargetRegisterInfo::shouldRealignStack(MF)) + return true; + + return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR; +} + // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'. // TODO: In this case we should be really trying first to entirely eliminate diff --git a/llvm/test/CodeGen/X86/x86-32-intrcc.ll b/llvm/test/CodeGen/X86/x86-32-intrcc.ll --- a/llvm/test/CodeGen/X86/x86-32-intrcc.ll +++ b/llvm/test/CodeGen/X86/x86-32-intrcc.ll @@ -9,63 +9,86 @@ ; Spills eax, putting original esp at +4. -; No stack adjustment if declared with no error code +; Stack is dyamically realigned to 16 bytes, and then reloaded to ebp - 4 +; With no error code, the stack is not incremented by 4 bytes before returning define x86_intrcc void @test_isr_no_ecode(ptr byval(%struct.interrupt_frame) %frame) nounwind { ; CHECK-LABEL: test_isr_no_ecode: ; CHECK: # %bb.0: +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %esp, %ebp ; CHECK-NEXT: pushl %eax +; CHECK-NEXT: andl $-16, %esp ; CHECK-NEXT: cld -; CHECK-NEXT: movl 12(%esp), %eax +; CHECK-NEXT: movl 12(%ebp), %eax ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: leal -4(%ebp), %esp ; CHECK-NEXT: popl %eax +; CHECK-NEXT: popl %ebp ; CHECK-NEXT: iretl ; ; CHECK0-LABEL: test_isr_no_ecode: ; CHECK0: # %bb.0: +; CHECK0-NEXT: pushl %ebp +; CHECK0-NEXT: movl %esp, %ebp ; CHECK0-NEXT: pushl %eax +; CHECK0-NEXT: andl $-16, %esp ; CHECK0-NEXT: cld -; CHECK0-NEXT: leal 4(%esp), %eax +; CHECK0-NEXT: leal 4(%ebp), %eax ; CHECK0-NEXT: movl 8(%eax), %eax ; CHECK0-NEXT: #APP ; CHECK0-NEXT: #NO_APP +; CHECK0-NEXT: leal -4(%ebp), %esp ; CHECK0-NEXT: popl %eax +; CHECK0-NEXT: popl %ebp ; CHECK0-NEXT: iretl +; CHECK-NEXT; movl %esp, %ebp %pflags = getelementptr inbounds %struct.interrupt_frame, ptr %frame, i32 0, i32 2 %flags = load i32, ptr %pflags, align 4 call void asm sideeffect "", "r"(i32 %flags) ret void } -; Spills eax and ecx, putting original esp at +8. Stack is adjusted up another 4 bytes -; before return, popping the error code. +; Spills eax and ecx, putting original esp at +8. +; Stack is dynamically realigned to 16 bytes, and then reloaded to ebp - 8 +; Error code is popped from the stack with an increment of 4 before returning define x86_intrcc void @test_isr_ecode(ptr byval(%struct.interrupt_frame) %frame, i32 %ecode) nounwind { ; CHECK-LABEL: test_isr_ecode: ; CHECK: # %bb.0: +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %esp, %ebp ; CHECK-NEXT: pushl %ecx ; CHECK-NEXT: pushl %eax +; CHECK-NEXT: andl $-16, %esp ; CHECK-NEXT: cld -; CHECK-NEXT: movl 8(%esp), %eax -; CHECK-NEXT: movl 20(%esp), %ecx +; CHECK-NEXT: movl 4(%ebp), %eax +; CHECK-NEXT: movl 16(%ebp), %ecx ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: leal -8(%ebp), %esp ; CHECK-NEXT: popl %eax ; CHECK-NEXT: popl %ecx +; CHECK-NEXT: popl %ebp ; CHECK-NEXT: addl $4, %esp ; CHECK-NEXT: iretl ; ; CHECK0-LABEL: test_isr_ecode: ; CHECK0: # %bb.0: +; CHECK0-NEXT: pushl %ebp +; CHECK0-NEXT: movl %esp, %ebp ; CHECK0-NEXT: pushl %ecx ; CHECK0-NEXT: pushl %eax +; CHECK0-NEXT: andl $-16, %esp ; CHECK0-NEXT: cld -; CHECK0-NEXT: movl 8(%esp), %ecx -; CHECK0-NEXT: leal 12(%esp), %eax +; CHECK0-NEXT: movl 4(%ebp), %ecx +; CHECK0-NEXT: leal 8(%ebp), %eax ; CHECK0-NEXT: movl 8(%eax), %eax ; CHECK0-NEXT: #APP ; CHECK0-NEXT: #NO_APP +; CHECK0-NEXT: leal -8(%ebp), %esp ; CHECK0-NEXT: popl %eax ; CHECK0-NEXT: popl %ecx +; CHECK0-NEXT: popl %ebp ; CHECK0-NEXT: addl $4, %esp ; CHECK0-NEXT: iretl %pflags = getelementptr inbounds %struct.interrupt_frame, ptr %frame, i32 0, i32 2 @@ -79,13 +102,18 @@ ; CHECK-LABEL: test_isr_clobbers: ; CHECK: # %bb.0: ; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: pushl %ecx ; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: pushl %eax +; CHECK-NEXT: andl $-16, %esp ; CHECK-NEXT: cld ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: leal -12(%ebp), %esp ; CHECK-NEXT: popl %eax ; CHECK-NEXT: popl %ebx +; CHECK-NEXT: popl %ecx ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: addl $4, %esp ; CHECK-NEXT: iretl @@ -93,17 +121,22 @@ ; CHECK0-LABEL: test_isr_clobbers: ; CHECK0: # %bb.0: ; CHECK0-NEXT: pushl %ebp +; CHECK0-NEXT: movl %esp, %ebp +; CHECK0-NEXT: pushl %ecx ; CHECK0-NEXT: pushl %ebx ; CHECK0-NEXT: pushl %eax +; CHECK0-NEXT: andl $-16, %esp ; CHECK0-NEXT: cld ; CHECK0-NEXT: #APP ; CHECK0-NEXT: #NO_APP +; CHECK0-NEXT: leal -12(%ebp), %esp ; CHECK0-NEXT: popl %eax ; CHECK0-NEXT: popl %ebx +; CHECK0-NEXT: popl %ecx ; CHECK0-NEXT: popl %ebp ; CHECK0-NEXT: addl $4, %esp ; CHECK0-NEXT: iretl - call void asm sideeffect "", "~{eax},~{ebx},~{ebp}"() + call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{ebp}"() ret void } @@ -113,20 +146,30 @@ define x86_intrcc void @test_isr_x87(ptr byval(%struct.interrupt_frame) %frame) nounwind { ; CHECK-LABEL: test_isr_x87: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: andl $-16, %esp ; CHECK-NEXT: cld ; CHECK-NEXT: fldt f80 ; CHECK-NEXT: fld1 ; CHECK-NEXT: faddp %st, %st(1) ; CHECK-NEXT: fstpt f80 +; CHECK-NEXT: movl %ebp, %esp +; CHECK-NEXT: popl %ebp ; CHECK-NEXT: iretl ; ; CHECK0-LABEL: test_isr_x87: ; CHECK0: # %bb.0: # %entry +; CHECK0-NEXT: pushl %ebp +; CHECK0-NEXT: movl %esp, %ebp +; CHECK0-NEXT: andl $-16, %esp ; CHECK0-NEXT: cld ; CHECK0-NEXT: fldt f80 ; CHECK0-NEXT: fld1 ; CHECK0-NEXT: faddp %st, %st(1) ; CHECK0-NEXT: fstpt f80 +; CHECK0-NEXT: movl %ebp, %esp +; CHECK0-NEXT: popl %ebp ; CHECK0-NEXT: iretl entry: %ld = load x86_fp80, ptr @f80, align 4 @@ -135,8 +178,8 @@ ret void } -; Use a frame pointer to check the offsets. No return address, arguments start -; at EBP+4. +; Use the interrupt_frame pointer to check the offsets. +; No return address, arguments start at EBP+4. define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %p) #0 { ; CHECK-LABEL: test_fp_1: ; CHECK: # %bb.0: # %entry @@ -144,11 +187,13 @@ ; CHECK-NEXT: movl %esp, %ebp ; CHECK-NEXT: pushl %ecx ; CHECK-NEXT: pushl %eax +; CHECK-NEXT: andl $-16, %esp ; CHECK-NEXT: cld ; CHECK-NEXT: leal 20(%ebp), %eax ; CHECK-NEXT: leal 4(%ebp), %ecx ; CHECK-NEXT: movl %ecx, sink_address ; CHECK-NEXT: movl %eax, sink_address +; CHECK-NEXT: leal -8(%ebp), %esp ; CHECK-NEXT: popl %eax ; CHECK-NEXT: popl %ecx ; CHECK-NEXT: popl %ebp @@ -160,12 +205,14 @@ ; CHECK0-NEXT: movl %esp, %ebp ; CHECK0-NEXT: pushl %ecx ; CHECK0-NEXT: pushl %eax +; CHECK0-NEXT: andl $-16, %esp ; CHECK0-NEXT: cld ; CHECK0-NEXT: leal 4(%ebp), %ecx ; CHECK0-NEXT: movl %ecx, %eax ; CHECK0-NEXT: addl $16, %eax ; CHECK0-NEXT: movl %ecx, sink_address ; CHECK0-NEXT: movl %eax, sink_address +; CHECK0-NEXT: leal -8(%ebp), %esp ; CHECK0-NEXT: popl %eax ; CHECK0-NEXT: popl %ecx ; CHECK0-NEXT: popl %ebp @@ -186,6 +233,7 @@ ; CHECK-NEXT: pushl %edx ; CHECK-NEXT: pushl %ecx ; CHECK-NEXT: pushl %eax +; CHECK-NEXT: andl $-16, %esp ; CHECK-NEXT: cld ; CHECK-NEXT: movl 4(%ebp), %eax ; CHECK-NEXT: leal 24(%ebp), %ecx @@ -193,6 +241,7 @@ ; CHECK-NEXT: movl %edx, sink_address ; CHECK-NEXT: movl %ecx, sink_address ; CHECK-NEXT: movl %eax, sink_i32 +; CHECK-NEXT: leal -12(%ebp), %esp ; CHECK-NEXT: popl %eax ; CHECK-NEXT: popl %ecx ; CHECK-NEXT: popl %edx @@ -207,6 +256,7 @@ ; CHECK0-NEXT: pushl %edx ; CHECK0-NEXT: pushl %ecx ; CHECK0-NEXT: pushl %eax +; CHECK0-NEXT: andl $-16, %esp ; CHECK0-NEXT: cld ; CHECK0-NEXT: movl 4(%ebp), %eax ; CHECK0-NEXT: leal 8(%ebp), %edx @@ -215,6 +265,7 @@ ; CHECK0-NEXT: movl %edx, sink_address ; CHECK0-NEXT: movl %ecx, sink_address ; CHECK0-NEXT: movl %eax, sink_i32 +; CHECK0-NEXT: leal -12(%ebp), %esp ; CHECK0-NEXT: popl %eax ; CHECK0-NEXT: popl %ecx ; CHECK0-NEXT: popl %edx @@ -236,9 +287,11 @@ ; CHECK-NEXT: pushl %ebp ; CHECK-NEXT: movl %esp, %ebp ; CHECK-NEXT: pushl %eax +; CHECK-NEXT: andl $-16, %esp ; CHECK-NEXT: cld ; CHECK-NEXT: leal 4(%ebp), %eax ; CHECK-NEXT: movl %eax, sink_address +; CHECK-NEXT: leal -4(%ebp), %esp ; CHECK-NEXT: popl %eax ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: addl $4, %esp @@ -249,10 +302,12 @@ ; CHECK0-NEXT: pushl %ebp ; CHECK0-NEXT: movl %esp, %ebp ; CHECK0-NEXT: pushl %eax +; CHECK0-NEXT: andl $-16, %esp ; CHECK0-NEXT: cld ; CHECK0-NEXT: movl 4(%ebp), %eax ; CHECK0-NEXT: leal 4(%ebp), %eax ; CHECK0-NEXT: movl %eax, sink_address +; CHECK0-NEXT: leal -4(%ebp), %esp ; CHECK0-NEXT: popl %eax ; CHECK0-NEXT: popl %ebp ; CHECK0-NEXT: addl $4, %esp @@ -264,4 +319,75 @@ ret void } +; Disabling dynamic realignment with attributes should work +define x86_intrcc void @test_isr_no_realign(ptr byval(%struct.interrupt_frame) %frame) #1 { +; CHECK-LABEL: test_isr_no_realign: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: cld +; CHECK-NEXT: movl 12(%esp), %eax +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: popl %eax +; CHECK-NEXT: iretl +; +; CHECK0-LABEL: test_isr_no_realign: +; CHECK0: # %bb.0: +; CHECK0-NEXT: pushl %eax +; CHECK0-NEXT: cld +; CHECK0-NEXT: leal 4(%esp), %eax +; CHECK0-NEXT: movl 8(%eax), %eax +; CHECK0-NEXT: #APP +; CHECK0-NEXT: #NO_APP +; CHECK0-NEXT: popl %eax +; CHECK0-NEXT: iretl + %pflags = getelementptr inbounds %struct.interrupt_frame, ptr %frame, i32 0, i32 2 + %flags = load i32, ptr %pflags, align 4 + call void asm sideeffect "", "r"(i32 %flags) + ret void +} + +; The stackrealign attribute should work, and the function's alignment +; should be respected over the default 16-byte alignment required by the calling +; convention. +define x86_intrcc void @test_isr_realign(ptr byval(%struct.interrupt_frame) %frame, i32 %ecode) #2 { +; CHECK-LABEL: test_isr_realign: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: andl $-32, %esp +; CHECK-NEXT: subl $32, %esp +; CHECK-NEXT: cld +; CHECK-NEXT: movl 4(%ebp), %eax +; CHECK-NEXT: movl %eax, (%esp) +; CHECK-NEXT: leal -4(%ebp), %esp +; CHECK-NEXT: popl %eax +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: iretl +; +; CHECK0-LABEL: test_isr_realign: +; CHECK0: # %bb.0: +; CHECK0-NEXT: pushl %ebp +; CHECK0-NEXT: movl %esp, %ebp +; CHECK0-NEXT: pushl %eax +; CHECK0-NEXT: andl $-32, %esp +; CHECK0-NEXT: subl $32, %esp +; CHECK0-NEXT: cld +; CHECK0-NEXT: movl 4(%ebp), %eax +; CHECK0-NEXT: movl %eax, (%esp) +; CHECK0-NEXT: leal -4(%ebp), %esp +; CHECK0-NEXT: popl %eax +; CHECK0-NEXT: popl %ebp +; CHECK0-NEXT: addl $4, %esp +; CHECK0-NEXT: iretl + %ecode.stack = alloca i32, align 32 + store i32 %ecode, ptr %ecode.stack + ret void +} + + attributes #0 = { nounwind "frame-pointer"="all" } +attributes #1 = { nounwind "no-realign-stack" } +attributes #2 = { nounwind "stackrealign" } diff --git a/llvm/test/CodeGen/X86/x86-interrupt_cc.ll b/llvm/test/CodeGen/X86/x86-interrupt_cc.ll --- a/llvm/test/CodeGen/X86/x86-interrupt_cc.ll +++ b/llvm/test/CodeGen/X86/x86-interrupt_cc.ll @@ -506,50 +506,52 @@ ; ; CHECK32-KNL-LABEL: foo: ; CHECK32-KNL: ## %bb.0: -; CHECK32-KNL-NEXT: pushl %edx ## encoding: [0x52] +; CHECK32-KNL-NEXT: pushl %ebp ## encoding: [0x55] ; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-KNL-NEXT: .cfi_offset %ebp, -8 +; CHECK32-KNL-NEXT: movl %esp, %ebp ## encoding: [0x89,0xe5] +; CHECK32-KNL-NEXT: .cfi_def_cfa_register %ebp +; CHECK32-KNL-NEXT: pushl %edx ## encoding: [0x52] ; CHECK32-KNL-NEXT: pushl %ecx ## encoding: [0x51] -; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 12 ; CHECK32-KNL-NEXT: pushl %eax ## encoding: [0x50] -; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-KNL-NEXT: andl $-16, %esp ## encoding: [0x83,0xe4,0xf0] ; CHECK32-KNL-NEXT: subl $560, %esp ## encoding: [0x81,0xec,0x30,0x02,0x00,0x00] ; CHECK32-KNL-NEXT: ## imm = 0x230 ; CHECK32-KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xbc,0x24,0x2e,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x7d,0xf2] ; CHECK32-KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xb4,0x24,0x2c,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x75,0xf0] ; CHECK32-KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xac,0x24,0x2a,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x6d,0xee] ; CHECK32-KNL-NEXT: kmovw %k4, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0xa4,0x24,0x28,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x65,0xec] ; CHECK32-KNL-NEXT: kmovw %k3, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x9c,0x24,0x26,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x5d,0xea] ; CHECK32-KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x94,0x24,0x24,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x55,0xe8] ; CHECK32-KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x8c,0x24,0x22,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x4d,0xe6] ; CHECK32-KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x84,0x24,0x20,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x91,0x45,0xe4] ; CHECK32-KNL-NEXT: vmovups %zmm7, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbd,0x88,0xff,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups %zmm6, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xb5,0x48,0xff,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups %zmm5, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xad,0x08,0xff,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups %zmm4, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xa5,0xc8,0xfe,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups %zmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x9d,0x88,0xfe,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups %zmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x95,0x48,0xfe,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01] -; CHECK32-KNL-NEXT: vmovups %zmm0, (%esp) ## 64-byte Spill -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24] -; CHECK32-KNL-NEXT: .cfi_def_cfa_offset 576 -; CHECK32-KNL-NEXT: .cfi_offset %eax, -16 -; CHECK32-KNL-NEXT: .cfi_offset %ecx, -12 -; CHECK32-KNL-NEXT: .cfi_offset %edx, -8 +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x8d,0x08,0xfe,0xff,0xff] +; CHECK32-KNL-NEXT: vmovups %zmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x85,0xc8,0xfd,0xff,0xff] +; CHECK32-KNL-NEXT: .cfi_offset %eax, -20 +; CHECK32-KNL-NEXT: .cfi_offset %ecx, -16 +; CHECK32-KNL-NEXT: .cfi_offset %edx, -12 ; CHECK32-KNL-NEXT: .cfi_offset %xmm0, -576 ; CHECK32-KNL-NEXT: .cfi_offset %xmm1, -512 ; CHECK32-KNL-NEXT: .cfi_offset %xmm2, -448 @@ -558,102 +560,104 @@ ; CHECK32-KNL-NEXT: .cfi_offset %xmm5, -256 ; CHECK32-KNL-NEXT: .cfi_offset %xmm6, -192 ; CHECK32-KNL-NEXT: .cfi_offset %xmm7, -128 -; CHECK32-KNL-NEXT: .cfi_offset %k0, -32 -; CHECK32-KNL-NEXT: .cfi_offset %k1, -30 -; CHECK32-KNL-NEXT: .cfi_offset %k2, -28 -; CHECK32-KNL-NEXT: .cfi_offset %k3, -26 -; CHECK32-KNL-NEXT: .cfi_offset %k4, -24 -; CHECK32-KNL-NEXT: .cfi_offset %k5, -22 -; CHECK32-KNL-NEXT: .cfi_offset %k6, -20 -; CHECK32-KNL-NEXT: .cfi_offset %k7, -18 +; CHECK32-KNL-NEXT: .cfi_offset %k0, -36 +; CHECK32-KNL-NEXT: .cfi_offset %k1, -34 +; CHECK32-KNL-NEXT: .cfi_offset %k2, -32 +; CHECK32-KNL-NEXT: .cfi_offset %k3, -30 +; CHECK32-KNL-NEXT: .cfi_offset %k4, -28 +; CHECK32-KNL-NEXT: .cfi_offset %k5, -26 +; CHECK32-KNL-NEXT: .cfi_offset %k6, -24 +; CHECK32-KNL-NEXT: .cfi_offset %k7, -22 ; CHECK32-KNL-NEXT: cld ## encoding: [0xfc] ; CHECK32-KNL-NEXT: calll _bar ## encoding: [0xe8,A,A,A,A] ; CHECK32-KNL-NEXT: ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4 -; CHECK32-KNL-NEXT: vmovups (%esp), %zmm0 ## 64-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24] +; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm0 ## 64-byte Reload +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x85,0xc8,0xfd,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm1 ## 64-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x8d,0x08,0xfe,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm2 ## 64-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x95,0x48,0xfe,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm3 ## 64-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x9d,0x88,0xfe,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm4 ## 64-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xa5,0xc8,0xfe,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm5 ## 64-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xad,0x08,0xff,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm6 ## 64-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xb5,0x48,0xff,0xff,0xff] ; CHECK32-KNL-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm7 ## 64-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07] +; CHECK32-KNL-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xbd,0x88,0xff,0xff,0xff] ; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 ## 2-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x84,0x24,0x20,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x45,0xe4] ; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x8c,0x24,0x22,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x4d,0xe6] ; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 2-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x94,0x24,0x24,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x55,0xe8] ; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k3 ## 2-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x9c,0x24,0x26,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x5d,0xea] ; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k4 ## 2-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xa4,0x24,0x28,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x65,0xec] ; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 2-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xac,0x24,0x2a,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x6d,0xee] ; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xb4,0x24,0x2c,0x02,0x00,0x00] +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x75,0xf0] ; CHECK32-KNL-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload -; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0xbc,0x24,0x2e,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: addl $560, %esp ## encoding: [0x81,0xc4,0x30,0x02,0x00,0x00] -; CHECK32-KNL-NEXT: ## imm = 0x230 +; CHECK32-KNL-NEXT: ## encoding: [0xc5,0xf8,0x90,0x7d,0xf2] +; CHECK32-KNL-NEXT: leal -12(%ebp), %esp ## encoding: [0x8d,0x65,0xf4] ; CHECK32-KNL-NEXT: popl %eax ## encoding: [0x58] ; CHECK32-KNL-NEXT: popl %ecx ## encoding: [0x59] ; CHECK32-KNL-NEXT: popl %edx ## encoding: [0x5a] +; CHECK32-KNL-NEXT: popl %ebp ## encoding: [0x5d] ; CHECK32-KNL-NEXT: iretl ## encoding: [0xcf] ; ; CHECK32-SKX-LABEL: foo: ; CHECK32-SKX: ## %bb.0: -; CHECK32-SKX-NEXT: pushl %edx ## encoding: [0x52] +; CHECK32-SKX-NEXT: pushl %ebp ## encoding: [0x55] ; CHECK32-SKX-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-SKX-NEXT: .cfi_offset %ebp, -8 +; CHECK32-SKX-NEXT: movl %esp, %ebp ## encoding: [0x89,0xe5] +; CHECK32-SKX-NEXT: .cfi_def_cfa_register %ebp +; CHECK32-SKX-NEXT: pushl %edx ## encoding: [0x52] ; CHECK32-SKX-NEXT: pushl %ecx ## encoding: [0x51] -; CHECK32-SKX-NEXT: .cfi_def_cfa_offset 12 ; CHECK32-SKX-NEXT: pushl %eax ## encoding: [0x50] -; CHECK32-SKX-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-SKX-NEXT: andl $-16, %esp ## encoding: [0x83,0xe4,0xf0] ; CHECK32-SKX-NEXT: subl $624, %esp ## encoding: [0x81,0xec,0x70,0x02,0x00,0x00] ; CHECK32-SKX-NEXT: ## imm = 0x270 ; CHECK32-SKX-NEXT: kmovq %k7, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x7d,0xe8] ; CHECK32-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xb4,0x24,0x60,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x75,0xe0] ; CHECK32-SKX-NEXT: kmovq %k5, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xac,0x24,0x58,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x6d,0xd8] ; CHECK32-SKX-NEXT: kmovq %k4, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0xa4,0x24,0x50,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x65,0xd0] ; CHECK32-SKX-NEXT: kmovq %k3, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x9c,0x24,0x48,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x5d,0xc8] ; CHECK32-SKX-NEXT: kmovq %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x94,0x24,0x40,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x55,0xc0] ; CHECK32-SKX-NEXT: kmovq %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x8c,0x24,0x38,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x4d,0xb8] ; CHECK32-SKX-NEXT: kmovq %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 8-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x45,0xb0] ; CHECK32-SKX-NEXT: vmovups %zmm7, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbd,0x48,0xff,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups %zmm6, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xb5,0x08,0xff,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups %zmm5, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xad,0xc8,0xfe,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups %zmm4, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xa5,0x88,0xfe,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups %zmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x9d,0x48,0xfe,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups %zmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x95,0x08,0xfe,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01] -; CHECK32-SKX-NEXT: vmovups %zmm0, (%esp) ## 64-byte Spill -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24] -; CHECK32-SKX-NEXT: .cfi_def_cfa_offset 640 -; CHECK32-SKX-NEXT: .cfi_offset %eax, -16 -; CHECK32-SKX-NEXT: .cfi_offset %ecx, -12 -; CHECK32-SKX-NEXT: .cfi_offset %edx, -8 +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x8d,0xc8,0xfd,0xff,0xff] +; CHECK32-SKX-NEXT: vmovups %zmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x85,0x88,0xfd,0xff,0xff] +; CHECK32-SKX-NEXT: .cfi_offset %eax, -20 +; CHECK32-SKX-NEXT: .cfi_offset %ecx, -16 +; CHECK32-SKX-NEXT: .cfi_offset %edx, -12 ; CHECK32-SKX-NEXT: .cfi_offset %xmm0, -640 ; CHECK32-SKX-NEXT: .cfi_offset %xmm1, -576 ; CHECK32-SKX-NEXT: .cfi_offset %xmm2, -512 @@ -662,55 +666,55 @@ ; CHECK32-SKX-NEXT: .cfi_offset %xmm5, -320 ; CHECK32-SKX-NEXT: .cfi_offset %xmm6, -256 ; CHECK32-SKX-NEXT: .cfi_offset %xmm7, -192 -; CHECK32-SKX-NEXT: .cfi_offset %k0, -80 -; CHECK32-SKX-NEXT: .cfi_offset %k1, -72 -; CHECK32-SKX-NEXT: .cfi_offset %k2, -64 -; CHECK32-SKX-NEXT: .cfi_offset %k3, -56 -; CHECK32-SKX-NEXT: .cfi_offset %k4, -48 -; CHECK32-SKX-NEXT: .cfi_offset %k5, -40 -; CHECK32-SKX-NEXT: .cfi_offset %k6, -32 -; CHECK32-SKX-NEXT: .cfi_offset %k7, -24 +; CHECK32-SKX-NEXT: .cfi_offset %k0, -88 +; CHECK32-SKX-NEXT: .cfi_offset %k1, -80 +; CHECK32-SKX-NEXT: .cfi_offset %k2, -72 +; CHECK32-SKX-NEXT: .cfi_offset %k3, -64 +; CHECK32-SKX-NEXT: .cfi_offset %k4, -56 +; CHECK32-SKX-NEXT: .cfi_offset %k5, -48 +; CHECK32-SKX-NEXT: .cfi_offset %k6, -40 +; CHECK32-SKX-NEXT: .cfi_offset %k7, -32 ; CHECK32-SKX-NEXT: cld ## encoding: [0xfc] ; CHECK32-SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; CHECK32-SKX-NEXT: calll _bar ## encoding: [0xe8,A,A,A,A] ; CHECK32-SKX-NEXT: ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4 -; CHECK32-SKX-NEXT: vmovups (%esp), %zmm0 ## 64-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24] +; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm0 ## 64-byte Reload +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x85,0x88,0xfd,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm1 ## 64-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x8d,0xc8,0xfd,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm2 ## 64-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x95,0x08,0xfe,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm3 ## 64-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x9d,0x48,0xfe,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm4 ## 64-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xa5,0x88,0xfe,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm5 ## 64-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xad,0xc8,0xfe,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm6 ## 64-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xb5,0x08,0xff,0xff,0xff] ; CHECK32-SKX-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %zmm7 ## 64-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07] +; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xbd,0x48,0xff,0xff,0xff] ; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k0 ## 8-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x45,0xb0] ; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 8-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x8c,0x24,0x38,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x4d,0xb8] ; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 8-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x94,0x24,0x40,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x55,0xc0] ; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k3 ## 8-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x9c,0x24,0x48,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x5d,0xc8] ; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k4 ## 8-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xa4,0x24,0x50,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x65,0xd0] ; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k5 ## 8-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xac,0x24,0x58,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x6d,0xd8] ; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 8-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xb4,0x24,0x60,0x02,0x00,0x00] +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x75,0xe0] ; CHECK32-SKX-NEXT: kmovq {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 8-byte Reload -; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0xbc,0x24,0x68,0x02,0x00,0x00] -; CHECK32-SKX-NEXT: addl $624, %esp ## encoding: [0x81,0xc4,0x70,0x02,0x00,0x00] -; CHECK32-SKX-NEXT: ## imm = 0x270 +; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x7d,0xe8] +; CHECK32-SKX-NEXT: leal -12(%ebp), %esp ## encoding: [0x8d,0x65,0xf4] ; CHECK32-SKX-NEXT: popl %eax ## encoding: [0x58] ; CHECK32-SKX-NEXT: popl %ecx ## encoding: [0x59] ; CHECK32-SKX-NEXT: popl %edx ## encoding: [0x5a] +; CHECK32-SKX-NEXT: popl %ebp ## encoding: [0x5d] ; CHECK32-SKX-NEXT: iretl ## encoding: [0xcf] call void @bar() ret void