Index: llvm/include/llvm/CodeGen/SelectionDAG.h =================================================================== --- llvm/include/llvm/CodeGen/SelectionDAG.h +++ llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1595,6 +1595,12 @@ void dump() const; + /// In most cases this function returns the ABI alignment for a given type, + /// except for illegal vector types where the alignment exceeds that of the + /// stack. In such cases we attempt to break the vector down to a legal type + /// and return the ABI alignment for that instead. + Align getReducedABIAlign(EVT VT, Type *&Ty); + /// Create a stack temporary based on the size in bytes and the alignment SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -38,6 +38,7 @@ #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -1998,11 +1999,58 @@ return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); } +Align SelectionDAG::getReducedABIAlign(EVT VT, Type *&Ty) { + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + const Align StackAlign = TFI->getStackAlign(); + const DataLayout &DL = getDataLayout(); + + Ty = VT.getTypeForEVT(*getContext()); + Align ABIAlign = DL.getABITypeAlign(Ty); + + // See if we can choose a smaller ABI alignment in cases where it's an + // illegal vector type that will get broken down. + if (ABIAlign > StackAlign && VT.isVector() && !TLI->isTypeLegal(VT)) { + EVT IntermediateVT; + MVT RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs = TLI->getVectorTypeBreakdown( + *getContext(), VT, IntermediateVT, NumIntermediates, RegisterVT); + Type *Ty2 = IntermediateVT.getTypeForEVT(*getContext()); + Align ABIAlign2 = DL.getABITypeAlign(Ty2); + if (ABIAlign2 < ABIAlign) { + ABIAlign = ABIAlign2; + Ty = Ty2; + } + } + + return ABIAlign; +} + SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { - Type *Ty = VT.getTypeForEVT(*getContext()); - Align StackAlign = - std::max(getDataLayout().getPrefTypeAlign(Ty), Align(minAlign)); - return CreateStackTemporary(VT.getStoreSize(), StackAlign); + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + TypeSize Bytes = VT.getStoreSize(); + Type *Ty; + + // This will return a reduced ABI alignment for vector types when the + // type is illegal and the alignment exceeds that of the stack. This + // reduces the amount of stack required and also ensures we don't + // exceed the permitted alignment for scalable vectors. + const Align ABIAlign = getReducedABIAlign(VT, Ty); + + // The preferred alignment will use the reduced vector type returned + // from above. + const Align PrefAlign = getDataLayout().getPrefTypeAlign(Ty); + + Align Alignment; + + // We only permit preferred alignments if they exceed the ABI + // requirement, but do not exceed the stack alignment. + if (PrefAlign > ABIAlign && PrefAlign <= TFI->getStackAlign()) + Alignment = PrefAlign; + else + Alignment = ABIAlign; + Alignment = std::max(Alignment, Align(minAlign)); + return CreateStackTemporary(Bytes, Alignment); } SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { Index: llvm/test/CodeGen/AArch64/build-one-lane.ll =================================================================== --- llvm/test/CodeGen/AArch64/build-one-lane.ll +++ llvm/test/CodeGen/AArch64/build-one-lane.ll @@ -270,3 +270,14 @@ ; CHECK: mov v[[R]].d[1], v{{[0-9]+}}.d[0] ; CHECK: str q[[R]], [x{{[0-9]+}}] } + +; In this test the illegal type has a ABI alignment greater than the +; stack alignment, that gets reduced to the alignment of a broken down +; legal type. +define <32 x i8> @test_lanex_32xi8(<32 x i8> %a, i32 %x) { +; CHECK-LABEL: test_lanex_32xi8 +; CHECK: stp q0, q1, [sp, #-32]! +; CHECK: ldp q0, q1, [sp], #32 + %b = insertelement <32 x i8> %a, i8 30, i32 %x + ret <32 x i8> %b +} Index: llvm/test/CodeGen/AMDGPU/scratch-simple.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/scratch-simple.ll +++ llvm/test/CodeGen/AMDGPU/scratch-simple.ll @@ -26,8 +26,8 @@ ; GCN-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]] ; GCN-NOT: s_mov_b32 s0 -; GCN-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], 0x200, [[CLAMP_IDX]] -; GCN-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], 0x400, [[CLAMP_IDX]] +; GCN-DAG: v_add{{_|_nc_}}{{i|u}}32_e32 [[HI_OFF:v[0-9]+]],{{.*}} 0x280, [[CLAMP_IDX]] +; GCN-DAG: v_add{{_|_nc_}}{{i|u}}32_e32 [[LO_OFF:v[0-9]+]],{{.*}} {{v2|0x80}}, [[CLAMP_IDX]] ; GCN: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen ; GCN: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen Index: llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll +++ llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll @@ -25,7 +25,7 @@ ; GCN: buffer_store_dword {{v[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Spill ; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Reload ; GCN: NumVgprs: 256 -; GCN: ScratchSize: 1536 +; GCN: ScratchSize: 768 define amdgpu_vs void @main([9 x <4 x i32>] addrspace(4)* inreg %arg, [17 x <4 x i32>] addrspace(4)* inreg %arg1, [17 x <4 x i32>] addrspace(4)* inreg %arg2, [34 x <8 x i32>] addrspace(4)* inreg %arg3, [16 x <4 x i32>] addrspace(4)* inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 { bb: Index: llvm/test/CodeGen/ARM/combine-vmovdrr.ll =================================================================== --- llvm/test/CodeGen/ARM/combine-vmovdrr.ll +++ llvm/test/CodeGen/ARM/combine-vmovdrr.ll @@ -29,7 +29,7 @@ ; Check that we do not perform the transformation for dynamic index. ; CHECK-LABEL: dynamicIndex: ; CHECK-NOT: mul -; CHECK: pop +; CHECK: add sp, #16 define void @dynamicIndex(<2 x i64>* %addr, <8 x i8>* %addr2, i32 %index) { %shuffle.i.bc.i309 = load <2 x i64>, <2 x i64>* %addr %vtbl2.i25.i = load <8 x i8>, <8 x i8>* %addr2 Index: llvm/test/CodeGen/ARM/vdup.ll =================================================================== --- llvm/test/CodeGen/ARM/vdup.ll +++ llvm/test/CodeGen/ARM/vdup.ll @@ -545,23 +545,17 @@ define <8 x i8> @check_i8_varidx(<16 x i8> %v, i32 %idx) { ; CHECK-LABEL: check_i8_varidx: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r11} -; CHECK-NEXT: push {r11} -; CHECK-NEXT: .setfp r11, sp -; CHECK-NEXT: mov r11, sp -; CHECK-NEXT: .pad #28 -; CHECK-NEXT: sub sp, sp, #28 -; CHECK-NEXT: bic sp, sp, #15 -; CHECK-NEXT: ldr r12, [r11, #4] +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: ldr r12, [sp, #16] ; CHECK-NEXT: vmov d17, r2, r3 ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: mov r1, sp ; CHECK-NEXT: and r0, r12, #15 -; CHECK-NEXT: vst1.64 {d16, d17}, [r1:128], r0 +; CHECK-NEXT: vst1.64 {d16, d17}, [r1], r0 ; CHECK-NEXT: vld1.8 {d16[]}, [r1] ; CHECK-NEXT: vmov r0, r1, d16 -; CHECK-NEXT: mov sp, r11 -; CHECK-NEXT: pop {r11} +; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: mov pc, lr %x = extractelement <16 x i8> %v, i32 %idx %1 = insertelement <8 x i8> undef, i8 %x, i32 0 Index: llvm/test/CodeGen/ARM/vstlane.ll =================================================================== --- llvm/test/CodeGen/ARM/vstlane.ll +++ llvm/test/CodeGen/ARM/vstlane.ll @@ -529,23 +529,21 @@ ; CHECK-LABEL: variable_insertelement: ; CHECK: @ %bb.0: ; CHECK-NEXT: push {r11, lr} -; CHECK-NEXT: mov r11, sp -; CHECK-NEXT: sub sp, sp, #24 -; CHECK-NEXT: bic sp, sp, #15 -; CHECK-NEXT: ldr lr, [r11, #12] +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: ldr lr, [sp, #28] ; CHECK-NEXT: vmov d17, r2, r3 ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: mov r1, sp ; CHECK-NEXT: and r0, lr, #7 ; CHECK-NEXT: mov r2, r1 -; CHECK-NEXT: ldrh r12, [r11, #8] +; CHECK-NEXT: ldrh r12, [sp, #24] ; CHECK-NEXT: lsl r0, r0, #1 -; CHECK-NEXT: vst1.64 {d16, d17}, [r2:128], r0 +; CHECK-NEXT: vst1.64 {d16, d17}, [r2], r0 ; CHECK-NEXT: strh r12, [r2] -; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128] +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: vmov r2, r3, d17 -; CHECK-NEXT: mov sp, r11 +; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: mov pc, lr %r = insertelement <8 x i16> %a, i16 %b, i32 %c Index: llvm/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll =================================================================== --- llvm/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll +++ llvm/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll @@ -2,7 +2,7 @@ define void @bar(<4 x i32>* %p, i32 %lane, <4 x i32> %phitmp) nounwind { ; CHECK: lsls r[[ADDR:[0-9]+]], r[[ADDR]], #2 -; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[SOURCE:[0-9]+]]:128], r[[ADDR]] +; CHECK: vst1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[SOURCE:[0-9]+]]], r[[ADDR]] ; CHECK: vld1.32 {[[DREG:d[0-9]+]][], [[DREG2:d[0-9]+]][]}, [r[[SOURCE]]:32] ; CHECK: vst1.32 {[[DREG]], [[DREG2]]}, [r0] %val = extractelement <4 x i32> %phitmp, i32 %lane Index: llvm/test/CodeGen/Thumb2/mve-shuffle.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-shuffle.ll +++ llvm/test/CodeGen/Thumb2/mve-shuffle.ll @@ -527,21 +527,13 @@ define arm_aapcs_vfpcc <2 x double> @insert_f64(double %a) { ; CHECK-LABEL: insert_f64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r6, r7, lr} -; CHECK-NEXT: push {r4, r6, r7, lr} -; CHECK-NEXT: .setfp r7, sp, #8 -; CHECK-NEXT: add r7, sp, #8 ; CHECK-NEXT: .pad #16 ; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: mov r4, sp -; CHECK-NEXT: bfc r4, #0, #4 -; CHECK-NEXT: mov sp, r4 -; CHECK-NEXT: sub.w r4, r7, #8 ; CHECK-NEXT: vstr d0, [sp] ; CHECK-NEXT: mov r0, sp ; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: mov sp, r4 -; CHECK-NEXT: pop {r4, r6, r7, pc} +; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: bx lr entry: %res = insertelement <2 x double> undef, double %a, i32 0 ret <2 x double> %res Index: llvm/test/CodeGen/X86/atomic-fp.ll =================================================================== --- llvm/test/CodeGen/X86/atomic-fp.ll +++ llvm/test/CodeGen/X86/atomic-fp.ll @@ -88,15 +88,15 @@ ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %ecx, (%esp) +; X86-NOSSE-NEXT: fldl (%esp) ; X86-NOSSE-NEXT: faddl 12(%ebp) ; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOSSE-NEXT: movl %ecx, (%esp) +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fistpll (%eax) ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp @@ -126,34 +126,26 @@ ; ; X86-SSE2-LABEL: fadd_64r: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pushl %ebp -; X86-SSE2-NEXT: movl %esp, %ebp -; X86-SSE2-NEXT: andl $-8, %esp ; X86-SSE2-NEXT: subl $8, %esp -; X86-SSE2-NEXT: movl 8(%ebp), %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: addsd 12(%ebp), %xmm0 +; X86-SSE2-NEXT: addsd {{[0-9]+}}(%esp), %xmm0 ; X86-SSE2-NEXT: movsd %xmm0, (%esp) ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: movlps %xmm0, (%eax) -; X86-SSE2-NEXT: movl %ebp, %esp -; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: addl $8, %esp ; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: fadd_64r: ; X86-AVX: # %bb.0: -; X86-AVX-NEXT: pushl %ebp -; X86-AVX-NEXT: movl %esp, %ebp -; X86-AVX-NEXT: andl $-8, %esp ; X86-AVX-NEXT: subl $8, %esp -; X86-AVX-NEXT: movl 8(%ebp), %eax +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vaddsd 12(%ebp), %xmm0, %xmm0 +; X86-AVX-NEXT: vaddsd {{[0-9]+}}(%esp), %xmm0, %xmm0 ; X86-AVX-NEXT: vmovsd %xmm0, (%esp) ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vmovlps %xmm0, (%eax) -; X86-AVX-NEXT: movl %ebp, %esp -; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: addl $8, %esp ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: fadd_64r: @@ -254,15 +246,15 @@ ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, (%esp) ; X86-NOSSE-NEXT: fld1 -; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: faddl (%esp) ; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fistpll glob64 ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp @@ -291,32 +283,24 @@ ; ; X86-SSE2-LABEL: fadd_64g: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pushl %ebp -; X86-SSE2-NEXT: movl %esp, %ebp -; X86-SSE2-NEXT: andl $-8, %esp ; X86-SSE2-NEXT: subl $8, %esp ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: addsd {{\.LCPI.*}}, %xmm0 ; X86-SSE2-NEXT: movsd %xmm0, (%esp) ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: movlps %xmm0, glob64 -; X86-SSE2-NEXT: movl %ebp, %esp -; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: addl $8, %esp ; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: fadd_64g: ; X86-AVX: # %bb.0: -; X86-AVX-NEXT: pushl %ebp -; X86-AVX-NEXT: movl %esp, %ebp -; X86-AVX-NEXT: andl $-8, %esp ; X86-AVX-NEXT: subl $8, %esp ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vaddsd {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovsd %xmm0, (%esp) ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vmovlps %xmm0, glob64 -; X86-AVX-NEXT: movl %ebp, %esp -; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: addl $8, %esp ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: fadd_64g: @@ -417,15 +401,15 @@ ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, (%esp) ; X86-NOSSE-NEXT: fld1 -; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: faddl (%esp) ; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fistpll -559038737 ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp @@ -454,32 +438,24 @@ ; ; X86-SSE2-LABEL: fadd_64imm: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pushl %ebp -; X86-SSE2-NEXT: movl %esp, %ebp -; X86-SSE2-NEXT: andl $-8, %esp ; X86-SSE2-NEXT: subl $8, %esp ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: addsd {{\.LCPI.*}}, %xmm0 ; X86-SSE2-NEXT: movsd %xmm0, (%esp) ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: movlps %xmm0, -559038737 -; X86-SSE2-NEXT: movl %ebp, %esp -; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: addl $8, %esp ; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: fadd_64imm: ; X86-AVX: # %bb.0: -; X86-AVX-NEXT: pushl %ebp -; X86-AVX-NEXT: movl %esp, %ebp -; X86-AVX-NEXT: andl $-8, %esp ; X86-AVX-NEXT: subl $8, %esp ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vaddsd {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovsd %xmm0, (%esp) ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vmovlps %xmm0, -559038737 -; X86-AVX-NEXT: movl %ebp, %esp -; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: addl $8, %esp ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: fadd_64imm: @@ -586,15 +562,15 @@ ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %eax, (%esp) ; X86-NOSSE-NEXT: fld1 -; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: faddl (%esp) ; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp @@ -629,9 +605,9 @@ ; X86-SSE2-NEXT: subl $16, %esp ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: addsd {{\.LCPI.*}}, %xmm0 -; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: movlps %xmm0, (%esp) ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl @@ -644,9 +620,9 @@ ; X86-AVX-NEXT: subl $16, %esp ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vaddsd {{\.LCPI.*}}, %xmm0, %xmm0 -; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; X86-AVX-NEXT: vmovlps %xmm0, (%esp) ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp ; X86-AVX-NEXT: retl @@ -689,15 +665,15 @@ ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %edx, (%esp) +; X86-NOSSE-NEXT: fldl (%esp) ; X86-NOSSE-NEXT: faddl 12(%ebp) ; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOSSE-NEXT: movl %edx, (%esp) +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8) ; X86-NOSSE-NEXT: leal -4(%ebp), %esp ; X86-NOSSE-NEXT: popl %esi @@ -729,36 +705,28 @@ ; ; X86-SSE2-LABEL: fadd_array: ; X86-SSE2: # %bb.0: # %bb -; X86-SSE2-NEXT: pushl %ebp -; X86-SSE2-NEXT: movl %esp, %ebp -; X86-SSE2-NEXT: andl $-8, %esp ; X86-SSE2-NEXT: subl $8, %esp -; X86-SSE2-NEXT: movl 20(%ebp), %eax -; X86-SSE2-NEXT: movl 8(%ebp), %ecx +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: addsd 12(%ebp), %xmm0 +; X86-SSE2-NEXT: addsd {{[0-9]+}}(%esp), %xmm0 ; X86-SSE2-NEXT: movsd %xmm0, (%esp) ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8) -; X86-SSE2-NEXT: movl %ebp, %esp -; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: addl $8, %esp ; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: fadd_array: ; X86-AVX: # %bb.0: # %bb -; X86-AVX-NEXT: pushl %ebp -; X86-AVX-NEXT: movl %esp, %ebp -; X86-AVX-NEXT: andl $-8, %esp ; X86-AVX-NEXT: subl $8, %esp -; X86-AVX-NEXT: movl 20(%ebp), %eax -; X86-AVX-NEXT: movl 8(%ebp), %ecx +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vaddsd 12(%ebp), %xmm0, %xmm0 +; X86-AVX-NEXT: vaddsd {{[0-9]+}}(%esp), %xmm0, %xmm0 ; X86-AVX-NEXT: vmovsd %xmm0, (%esp) ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8) -; X86-AVX-NEXT: movl %ebp, %esp -; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: addl $8, %esp ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: fadd_array: Index: llvm/test/CodeGen/X86/atomic-load-store-wide.ll =================================================================== --- llvm/test/CodeGen/X86/atomic-load-store-wide.ll +++ llvm/test/CodeGen/X86/atomic-load-store-wide.ll @@ -16,24 +16,18 @@ ; ; NOSSE-LABEL: test1: ; NOSSE: # %bb.0: -; NOSSE-NEXT: pushl %ebp -; NOSSE-NEXT: .cfi_def_cfa_offset 8 -; NOSSE-NEXT: .cfi_offset %ebp, -8 -; NOSSE-NEXT: movl %esp, %ebp -; NOSSE-NEXT: .cfi_def_cfa_register %ebp -; NOSSE-NEXT: andl $-8, %esp ; NOSSE-NEXT: subl $8, %esp -; NOSSE-NEXT: movl 8(%ebp), %eax -; NOSSE-NEXT: movl 12(%ebp), %ecx -; NOSSE-NEXT: movl 16(%ebp), %edx +; NOSSE-NEXT: .cfi_def_cfa_offset 12 +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) ; NOSSE-NEXT: movl %ecx, (%esp) ; NOSSE-NEXT: fildll (%esp) ; NOSSE-NEXT: fistpll (%eax) ; NOSSE-NEXT: lock orl $0, (%esp) -; NOSSE-NEXT: movl %ebp, %esp -; NOSSE-NEXT: popl %ebp -; NOSSE-NEXT: .cfi_def_cfa %esp, 4 +; NOSSE-NEXT: addl $8, %esp +; NOSSE-NEXT: .cfi_def_cfa_offset 4 ; NOSSE-NEXT: retl store atomic i64 %val1, i64* %ptr seq_cst, align 8 ret void @@ -50,21 +44,15 @@ ; ; NOSSE-LABEL: test2: ; NOSSE: # %bb.0: -; NOSSE-NEXT: pushl %ebp -; NOSSE-NEXT: .cfi_def_cfa_offset 8 -; NOSSE-NEXT: .cfi_offset %ebp, -8 -; NOSSE-NEXT: movl %esp, %ebp -; NOSSE-NEXT: .cfi_def_cfa_register %ebp -; NOSSE-NEXT: andl $-8, %esp ; NOSSE-NEXT: subl $8, %esp -; NOSSE-NEXT: movl 8(%ebp), %eax +; NOSSE-NEXT: .cfi_def_cfa_offset 12 +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; NOSSE-NEXT: fildll (%eax) ; NOSSE-NEXT: fistpll (%esp) ; NOSSE-NEXT: movl (%esp), %eax ; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; NOSSE-NEXT: movl %ebp, %esp -; NOSSE-NEXT: popl %ebp -; NOSSE-NEXT: .cfi_def_cfa %esp, 4 +; NOSSE-NEXT: addl $8, %esp +; NOSSE-NEXT: .cfi_def_cfa_offset 4 ; NOSSE-NEXT: retl %val = load atomic i64, i64* %ptr seq_cst, align 8 ret i64 %val @@ -106,21 +94,15 @@ ; ; NOSSE-LABEL: test4: ; NOSSE: # %bb.0: -; NOSSE-NEXT: pushl %ebp -; NOSSE-NEXT: .cfi_def_cfa_offset 8 -; NOSSE-NEXT: .cfi_offset %ebp, -8 -; NOSSE-NEXT: movl %esp, %ebp -; NOSSE-NEXT: .cfi_def_cfa_register %ebp -; NOSSE-NEXT: andl $-8, %esp ; NOSSE-NEXT: subl $8, %esp -; NOSSE-NEXT: movl 8(%ebp), %eax +; NOSSE-NEXT: .cfi_def_cfa_offset 12 +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; NOSSE-NEXT: fildll (%eax) ; NOSSE-NEXT: fistpll (%esp) ; NOSSE-NEXT: movl (%esp), %eax ; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; NOSSE-NEXT: movl %ebp, %esp -; NOSSE-NEXT: popl %ebp -; NOSSE-NEXT: .cfi_def_cfa %esp, 4 +; NOSSE-NEXT: addl $8, %esp +; NOSSE-NEXT: .cfi_def_cfa_offset 4 ; NOSSE-NEXT: retl %val = load atomic volatile i64, i64* %ptr seq_cst, align 8 ret i64 %val Index: llvm/test/CodeGen/X86/atomic-mi.ll =================================================================== --- llvm/test/CodeGen/X86/atomic-mi.ll +++ llvm/test/CodeGen/X86/atomic-mi.ll @@ -84,21 +84,15 @@ ; ; X32-LABEL: store_atomic_imm_64: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $8, %esp -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl $0, {{[0-9]+}}(%esp) ; X32-NEXT: movl $42, (%esp) ; X32-NEXT: fildll (%esp) ; X32-NEXT: fistpll (%eax) -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: addl $8, %esp +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; These are implemented with a CAS loop on 32 bit architectures, and thus ; cannot be optimized in the same way as the others. @@ -117,21 +111,15 @@ ; ; X32-LABEL: store_atomic_imm_64_big: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $8, %esp -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl $23, {{[0-9]+}}(%esp) ; X32-NEXT: movl $1215752192, (%esp) # imm = 0x4876E800 ; X32-NEXT: fildll (%esp) ; X32-NEXT: fistpll (%eax) -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: addl $8, %esp +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl store atomic i64 100000000000, i64* %p monotonic, align 8 ret void @@ -319,14 +307,9 @@ ; ; X32-LABEL: add_64i: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $16, %esp -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: .cfi_def_cfa_offset 20 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: fildll (%eax) ; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -337,9 +320,8 @@ ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X32-NEXT: fildll (%esp) ; X32-NEXT: fistpll (%eax) -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: addl $16, %esp +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'addq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -356,27 +338,21 @@ ; ; X32-LABEL: add_64r: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $16, %esp -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: .cfi_def_cfa_offset 20 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: fildll (%eax) ; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: addl 12(%ebp), %ecx -; X32-NEXT: adcl 16(%ebp), %edx +; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl %ecx, (%esp) ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X32-NEXT: fildll (%esp) ; X32-NEXT: fistpll (%eax) -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: addl $16, %esp +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'addq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -539,27 +515,21 @@ ; ; X32-LABEL: sub_64r: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $16, %esp -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: .cfi_def_cfa_offset 20 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: fildll (%eax) ; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: subl 12(%ebp), %ecx -; X32-NEXT: sbbl 16(%ebp), %edx +; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: sbbl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl %ecx, (%esp) ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X32-NEXT: fildll (%esp) ; X32-NEXT: fistpll (%eax) -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: addl $16, %esp +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'subq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -704,14 +674,9 @@ ; ; X32-LABEL: and_64i: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $16, %esp -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: .cfi_def_cfa_offset 20 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: fildll (%eax) ; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -720,9 +685,8 @@ ; X32-NEXT: movl $0, {{[0-9]+}}(%esp) ; X32-NEXT: fildll (%esp) ; X32-NEXT: fistpll (%eax) -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: addl $16, %esp +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'andq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -739,27 +703,21 @@ ; ; X32-LABEL: and_64r: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $16, %esp -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: .cfi_def_cfa_offset 20 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: fildll (%eax) ; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: andl 16(%ebp), %edx -; X32-NEXT: andl 12(%ebp), %ecx +; X32-NEXT: andl {{[0-9]+}}(%esp), %edx +; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl %ecx, (%esp) ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X32-NEXT: fildll (%esp) ; X32-NEXT: fistpll (%eax) -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: addl $16, %esp +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'andq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -925,14 +883,9 @@ ; ; X32-LABEL: or_64i: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $16, %esp -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: .cfi_def_cfa_offset 20 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: fildll (%eax) ; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -942,9 +895,8 @@ ; X32-NEXT: movl %ecx, (%esp) ; X32-NEXT: fildll (%esp) ; X32-NEXT: fistpll (%eax) -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: addl $16, %esp +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'orq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -961,27 +913,21 @@ ; ; X32-LABEL: or_64r: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $16, %esp -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: .cfi_def_cfa_offset 20 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: fildll (%eax) ; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: orl 16(%ebp), %edx -; X32-NEXT: orl 12(%ebp), %ecx +; X32-NEXT: orl {{[0-9]+}}(%esp), %edx +; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl %ecx, (%esp) ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X32-NEXT: fildll (%esp) ; X32-NEXT: fistpll (%eax) -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: addl $16, %esp +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'orq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -1147,14 +1093,9 @@ ; ; X32-LABEL: xor_64i: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $16, %esp -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: .cfi_def_cfa_offset 20 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: fildll (%eax) ; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -1164,9 +1105,8 @@ ; X32-NEXT: movl %ecx, (%esp) ; X32-NEXT: fildll (%esp) ; X32-NEXT: fistpll (%eax) -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: addl $16, %esp +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'xorq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -1183,27 +1123,21 @@ ; ; X32-LABEL: xor_64r: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $16, %esp -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: .cfi_def_cfa_offset 20 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: fildll (%eax) ; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: xorl 16(%ebp), %edx -; X32-NEXT: xorl 12(%ebp), %ecx +; X32-NEXT: xorl {{[0-9]+}}(%esp), %edx +; X32-NEXT: xorl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl %ecx, (%esp) ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X32-NEXT: fildll (%esp) ; X32-NEXT: fistpll (%eax) -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: addl $16, %esp +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'xorq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -1330,14 +1264,9 @@ ; ; X32-LABEL: inc_64: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $16, %esp -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: .cfi_def_cfa_offset 20 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: fildll (%eax) ; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -1348,9 +1277,8 @@ ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X32-NEXT: fildll (%esp) ; X32-NEXT: fistpll (%eax) -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: addl $16, %esp +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; ; SLOW_INC-LABEL: inc_64: @@ -1468,14 +1396,9 @@ ; ; X32-LABEL: dec_64: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $16, %esp -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: .cfi_def_cfa_offset 20 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: fildll (%eax) ; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -1486,9 +1409,8 @@ ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X32-NEXT: fildll (%esp) ; X32-NEXT: fistpll (%eax) -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: addl $16, %esp +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; ; SLOW_INC-LABEL: dec_64: @@ -1591,14 +1513,9 @@ ; ; X32-LABEL: not_64: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $16, %esp -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: .cfi_def_cfa_offset 20 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: fildll (%eax) ; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -1609,9 +1526,8 @@ ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X32-NEXT: fildll (%esp) ; X32-NEXT: fistpll (%eax) -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: addl $16, %esp +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'notq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -1706,14 +1622,9 @@ ; ; X32-LABEL: neg_64: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $16, %esp -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: .cfi_def_cfa_offset 20 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: fildll (%eax) ; X32-NEXT: fistpll {{[0-9]+}}(%esp) ; X32-NEXT: xorl %ecx, %ecx @@ -1724,9 +1635,8 @@ ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X32-NEXT: fildll (%esp) ; X32-NEXT: fistpll (%eax) -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: .cfi_def_cfa %esp, 4 +; X32-NEXT: addl $16, %esp +; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; We do neg check X86-32 as it canneg do 'negq'. %1 = load atomic i64, i64* %p acquire, align 8 Index: llvm/test/CodeGen/X86/atomic64.ll =================================================================== --- llvm/test/CodeGen/X86/atomic64.ll +++ llvm/test/CodeGen/X86/atomic64.ll @@ -851,12 +851,9 @@ ; ; I486-LABEL: atomic_fetch_swapf64: ; I486: # %bb.0: -; I486-NEXT: pushl %ebp -; I486-NEXT: movl %esp, %ebp ; I486-NEXT: pushl %esi -; I486-NEXT: andl $-8, %esp -; I486-NEXT: subl $40, %esp -; I486-NEXT: fldl 8(%ebp) +; I486-NEXT: subl $28, %esp +; I486-NEXT: fldl {{[0-9]+}}(%esp) ; I486-NEXT: leal fsc64, %eax ; I486-NEXT: fstpl {{[0-9]+}}(%esp) ; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -868,9 +865,8 @@ ; I486-NEXT: movl $fsc64, (%esi) ; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: calll __atomic_exchange_8 -; I486-NEXT: leal -4(%ebp), %esp +; I486-NEXT: addl $28, %esp ; I486-NEXT: popl %esi -; I486-NEXT: popl %ebp ; I486-NEXT: retl %t1 = atomicrmw xchg double* @fsc64, double %x acquire ret void Index: llvm/test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -1768,8 +1768,8 @@ ; KNL-NEXT: .cfi_offset %rbp, -16 ; KNL-NEXT: movq %rsp, %rbp ; KNL-NEXT: .cfi_def_cfa_register %rbp -; KNL-NEXT: andq $-128, %rsp -; KNL-NEXT: subq $256, %rsp ## imm = 0x100 +; KNL-NEXT: andq $-64, %rsp +; KNL-NEXT: subq $192, %rsp ; KNL-NEXT: movl 744(%rbp), %eax ; KNL-NEXT: andl $127, %eax ; KNL-NEXT: vmovd %edi, %xmm0 @@ -1939,8 +1939,8 @@ ; SKX-NEXT: .cfi_offset %rbp, -16 ; SKX-NEXT: movq %rsp, %rbp ; SKX-NEXT: .cfi_def_cfa_register %rbp -; SKX-NEXT: andq $-128, %rsp -; SKX-NEXT: subq $256, %rsp ## imm = 0x100 +; SKX-NEXT: andq $-64, %rsp +; SKX-NEXT: subq $192, %rsp ; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SKX-NEXT: vpinsrb $1, 232(%rbp), %xmm0, %xmm0 ; SKX-NEXT: vpinsrb $2, 240(%rbp), %xmm0, %xmm0 @@ -2076,8 +2076,8 @@ ; KNL-NEXT: .cfi_offset %rbp, -16 ; KNL-NEXT: movq %rsp, %rbp ; KNL-NEXT: .cfi_def_cfa_register %rbp -; KNL-NEXT: andq $-128, %rsp -; KNL-NEXT: subq $256, %rsp ## imm = 0x100 +; KNL-NEXT: andq $-64, %rsp +; KNL-NEXT: subq $192, %rsp ; KNL-NEXT: ## kill: def $esi killed $esi def $rsi ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm3 @@ -2153,8 +2153,8 @@ ; SKX-NEXT: .cfi_offset %rbp, -16 ; SKX-NEXT: movq %rsp, %rbp ; SKX-NEXT: .cfi_def_cfa_register %rbp -; SKX-NEXT: andq $-128, %rsp -; SKX-NEXT: subq $256, %rsp ## imm = 0x100 +; SKX-NEXT: andq $-64, %rsp +; SKX-NEXT: subq $192, %rsp ; SKX-NEXT: ## kill: def $esi killed $esi def $rsi ; SKX-NEXT: vptestmb %zmm0, %zmm0, %k0 ; SKX-NEXT: vptestmb %zmm1, %zmm1, %k1 Index: llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll =================================================================== --- llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -7318,13 +7318,8 @@ define double @test_mm512_reduce_add_pd(<8 x double> %__W) { ; X86-LABEL: test_mm512_reduce_add_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 12 ; X86-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 @@ -7333,9 +7328,8 @@ ; X86-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: addl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; @@ -7365,13 +7359,8 @@ define double @test_mm512_reduce_mul_pd(<8 x double> %__W) { ; X86-LABEL: test_mm512_reduce_mul_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 12 ; X86-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; X86-NEXT: vmulpd %ymm1, %ymm0, %ymm0 ; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 @@ -7380,9 +7369,8 @@ ; X86-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: addl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; @@ -7512,14 +7500,9 @@ define double @test_mm512_mask_reduce_add_pd(i8 zeroext %__M, <8 x double> %__W) { ; X86-LABEL: test_mm512_mask_reduce_add_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: movb 8(%ebp), %al +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: vextractf64x4 $1, %zmm0, %ymm1 @@ -7530,9 +7513,8 @@ ; X86-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: addl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; @@ -7566,14 +7548,9 @@ define double @test_mm512_mask_reduce_mul_pd(i8 zeroext %__M, <8 x double> %__W) { ; X86-LABEL: test_mm512_mask_reduce_mul_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: movb 8(%ebp), %al +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vbroadcastsd {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X86-NEXT: vmovapd %zmm0, %zmm1 {%k1} @@ -7585,9 +7562,8 @@ ; X86-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: addl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; @@ -7816,13 +7792,8 @@ define double @test_mm512_reduce_max_pd(<8 x double> %__W) { ; X86-LABEL: test_mm512_reduce_max_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 12 ; X86-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; X86-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 ; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 @@ -7831,9 +7802,8 @@ ; X86-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: addl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; @@ -7941,13 +7911,8 @@ define double @test_mm512_reduce_min_pd(<8 x double> %__W) { ; X86-LABEL: test_mm512_reduce_min_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 12 ; X86-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; X86-NEXT: vminpd %ymm1, %ymm0, %ymm0 ; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 @@ -7956,9 +7921,8 @@ ; X86-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: addl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; @@ -8082,14 +8046,9 @@ define double @test_mm512_mask_reduce_max_pd(i8 zeroext %__M, <8 x double> %__W) { ; X86-LABEL: test_mm512_mask_reduce_max_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: movb 8(%ebp), %al +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vbroadcastsd {{.*#+}} zmm1 = [-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf] ; X86-NEXT: vmovapd %zmm0, %zmm1 {%k1} @@ -8101,9 +8060,8 @@ ; X86-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: addl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; @@ -8234,14 +8192,9 @@ define double @test_mm512_mask_reduce_min_pd(i8 zeroext %__M, <8 x double> %__W) { ; X86-LABEL: test_mm512_mask_reduce_min_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: movb 8(%ebp), %al +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vbroadcastsd {{.*#+}} zmm1 = [+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf] ; X86-NEXT: vmovapd %zmm0, %zmm1 {%k1} @@ -8253,9 +8206,8 @@ ; X86-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: addl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; Index: llvm/test/CodeGen/X86/cmov-fp.ll =================================================================== --- llvm/test/CodeGen/X86/cmov-fp.ll +++ llvm/test/CodeGen/X86/cmov-fp.ll @@ -8,12 +8,9 @@ define double @test1(i32 %a, i32 %b, double %x) nounwind { ; SSE-LABEL: test1: ; SSE: # %bb.0: -; SSE-NEXT: pushl %ebp -; SSE-NEXT: movl %esp, %ebp -; SSE-NEXT: andl $-8, %esp ; SSE-NEXT: subl $8, %esp -; SSE-NEXT: movl 8(%ebp), %eax -; SSE-NEXT: cmpl 12(%ebp), %eax +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: ja .LBB0_1 ; SSE-NEXT: # %bb.2: ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero @@ -23,8 +20,7 @@ ; SSE-NEXT: .LBB0_3: ; SSE-NEXT: movsd %xmm0, (%esp) ; SSE-NEXT: fldl (%esp) -; SSE-NEXT: movl %ebp, %esp -; SSE-NEXT: popl %ebp +; SSE-NEXT: addl $8, %esp ; SSE-NEXT: retl ; ; NOSSE2-LABEL: test1: @@ -71,12 +67,9 @@ define double @test2(i32 %a, i32 %b, double %x) nounwind { ; SSE-LABEL: test2: ; SSE: # %bb.0: -; SSE-NEXT: pushl %ebp -; SSE-NEXT: movl %esp, %ebp -; SSE-NEXT: andl $-8, %esp ; SSE-NEXT: subl $8, %esp -; SSE-NEXT: movl 8(%ebp), %eax -; SSE-NEXT: cmpl 12(%ebp), %eax +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: jae .LBB1_1 ; SSE-NEXT: # %bb.2: ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero @@ -86,8 +79,7 @@ ; SSE-NEXT: .LBB1_3: ; SSE-NEXT: movsd %xmm0, (%esp) ; SSE-NEXT: fldl (%esp) -; SSE-NEXT: movl %ebp, %esp -; SSE-NEXT: popl %ebp +; SSE-NEXT: addl $8, %esp ; SSE-NEXT: retl ; ; NOSSE2-LABEL: test2: @@ -134,12 +126,9 @@ define double @test3(i32 %a, i32 %b, double %x) nounwind { ; SSE-LABEL: test3: ; SSE: # %bb.0: -; SSE-NEXT: pushl %ebp -; SSE-NEXT: movl %esp, %ebp -; SSE-NEXT: andl $-8, %esp ; SSE-NEXT: subl $8, %esp -; SSE-NEXT: movl 8(%ebp), %eax -; SSE-NEXT: cmpl 12(%ebp), %eax +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: jb .LBB2_1 ; SSE-NEXT: # %bb.2: ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero @@ -149,8 +138,7 @@ ; SSE-NEXT: .LBB2_3: ; SSE-NEXT: movsd %xmm0, (%esp) ; SSE-NEXT: fldl (%esp) -; SSE-NEXT: movl %ebp, %esp -; SSE-NEXT: popl %ebp +; SSE-NEXT: addl $8, %esp ; SSE-NEXT: retl ; ; NOSSE2-LABEL: test3: @@ -197,12 +185,9 @@ define double @test4(i32 %a, i32 %b, double %x) nounwind { ; SSE-LABEL: test4: ; SSE: # %bb.0: -; SSE-NEXT: pushl %ebp -; SSE-NEXT: movl %esp, %ebp -; SSE-NEXT: andl $-8, %esp ; SSE-NEXT: subl $8, %esp -; SSE-NEXT: movl 8(%ebp), %eax -; SSE-NEXT: cmpl 12(%ebp), %eax +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: jbe .LBB3_1 ; SSE-NEXT: # %bb.2: ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero @@ -212,8 +197,7 @@ ; SSE-NEXT: .LBB3_3: ; SSE-NEXT: movsd %xmm0, (%esp) ; SSE-NEXT: fldl (%esp) -; SSE-NEXT: movl %ebp, %esp -; SSE-NEXT: popl %ebp +; SSE-NEXT: addl $8, %esp ; SSE-NEXT: retl ; ; NOSSE2-LABEL: test4: @@ -260,12 +244,9 @@ define double @test5(i32 %a, i32 %b, double %x) nounwind { ; SSE-LABEL: test5: ; SSE: # %bb.0: -; SSE-NEXT: pushl %ebp -; SSE-NEXT: movl %esp, %ebp -; SSE-NEXT: andl $-8, %esp ; SSE-NEXT: subl $8, %esp -; SSE-NEXT: movl 8(%ebp), %eax -; SSE-NEXT: cmpl 12(%ebp), %eax +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: jg .LBB4_1 ; SSE-NEXT: # %bb.2: ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero @@ -275,8 +256,7 @@ ; SSE-NEXT: .LBB4_3: ; SSE-NEXT: movsd %xmm0, (%esp) ; SSE-NEXT: fldl (%esp) -; SSE-NEXT: movl %ebp, %esp -; SSE-NEXT: popl %ebp +; SSE-NEXT: addl $8, %esp ; SSE-NEXT: retl ; ; NOSSE2-LABEL: test5: @@ -327,12 +307,9 @@ define double @test6(i32 %a, i32 %b, double %x) nounwind { ; SSE-LABEL: test6: ; SSE: # %bb.0: -; SSE-NEXT: pushl %ebp -; SSE-NEXT: movl %esp, %ebp -; SSE-NEXT: andl $-8, %esp ; SSE-NEXT: subl $8, %esp -; SSE-NEXT: movl 8(%ebp), %eax -; SSE-NEXT: cmpl 12(%ebp), %eax +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: jge .LBB5_1 ; SSE-NEXT: # %bb.2: ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero @@ -342,8 +319,7 @@ ; SSE-NEXT: .LBB5_3: ; SSE-NEXT: movsd %xmm0, (%esp) ; SSE-NEXT: fldl (%esp) -; SSE-NEXT: movl %ebp, %esp -; SSE-NEXT: popl %ebp +; SSE-NEXT: addl $8, %esp ; SSE-NEXT: retl ; ; NOSSE2-LABEL: test6: @@ -394,12 +370,9 @@ define double @test7(i32 %a, i32 %b, double %x) nounwind { ; SSE-LABEL: test7: ; SSE: # %bb.0: -; SSE-NEXT: pushl %ebp -; SSE-NEXT: movl %esp, %ebp -; SSE-NEXT: andl $-8, %esp ; SSE-NEXT: subl $8, %esp -; SSE-NEXT: movl 8(%ebp), %eax -; SSE-NEXT: cmpl 12(%ebp), %eax +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: jl .LBB6_1 ; SSE-NEXT: # %bb.2: ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero @@ -409,8 +382,7 @@ ; SSE-NEXT: .LBB6_3: ; SSE-NEXT: movsd %xmm0, (%esp) ; SSE-NEXT: fldl (%esp) -; SSE-NEXT: movl %ebp, %esp -; SSE-NEXT: popl %ebp +; SSE-NEXT: addl $8, %esp ; SSE-NEXT: retl ; ; NOSSE2-LABEL: test7: @@ -461,12 +433,9 @@ define double @test8(i32 %a, i32 %b, double %x) nounwind { ; SSE-LABEL: test8: ; SSE: # %bb.0: -; SSE-NEXT: pushl %ebp -; SSE-NEXT: movl %esp, %ebp -; SSE-NEXT: andl $-8, %esp ; SSE-NEXT: subl $8, %esp -; SSE-NEXT: movl 8(%ebp), %eax -; SSE-NEXT: cmpl 12(%ebp), %eax +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; SSE-NEXT: jle .LBB7_1 ; SSE-NEXT: # %bb.2: ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero @@ -476,8 +445,7 @@ ; SSE-NEXT: .LBB7_3: ; SSE-NEXT: movsd %xmm0, (%esp) ; SSE-NEXT: fldl (%esp) -; SSE-NEXT: movl %ebp, %esp -; SSE-NEXT: popl %ebp +; SSE-NEXT: addl $8, %esp ; SSE-NEXT: retl ; ; NOSSE2-LABEL: test8: Index: llvm/test/CodeGen/X86/deopt-bundles.ll =================================================================== --- llvm/test/CodeGen/X86/deopt-bundles.ll +++ llvm/test/CodeGen/X86/deopt-bundles.ll @@ -161,14 +161,14 @@ define void @vector_deopt_bundle(<32 x i64 addrspace(1)*> %val) { ; CHECK-LABEL: _vector_deopt_bundle: -; CHECK: movaps 16(%rbp), %xmm8 -; CHECK-NEXT: movaps 32(%rbp), %xmm9 -; CHECK-NEXT: movaps 48(%rbp), %xmm10 -; CHECK-NEXT: movaps 64(%rbp), %xmm11 -; CHECK-NEXT: movaps 80(%rbp), %xmm12 -; CHECK-NEXT: movaps 96(%rbp), %xmm13 -; CHECK-NEXT: movaps 112(%rbp), %xmm14 -; CHECK-NEXT: movaps 128(%rbp), %xmm15 +; CHECK: movaps 272(%rsp), %xmm8 +; CHECK-NEXT: movaps 288(%rsp), %xmm9 +; CHECK-NEXT: movaps 304(%rsp), %xmm10 +; CHECK-NEXT: movaps 320(%rsp), %xmm11 +; CHECK-NEXT: movaps 336(%rsp), %xmm12 +; CHECK-NEXT: movaps 352(%rsp), %xmm13 +; CHECK-NEXT: movaps 368(%rsp), %xmm14 +; CHECK-NEXT: movaps 384(%rsp), %xmm15 ; CHECK-NEXT: movaps %xmm15, 240(%rsp) ; CHECK-NEXT: movaps %xmm14, 224(%rsp) ; CHECK-NEXT: movaps %xmm13, 208(%rsp) Index: llvm/test/CodeGen/X86/extractelement-fp.ll =================================================================== --- llvm/test/CodeGen/X86/extractelement-fp.ll +++ llvm/test/CodeGen/X86/extractelement-fp.ll @@ -34,17 +34,13 @@ ; ; X86-LABEL: fneg_v4f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: vmovddup {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0] ; X86-NEXT: # xmm1 = mem[0,0] ; X86-NEXT: vxorps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovlps %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v = fneg <4 x double> %x @@ -80,15 +76,11 @@ ; ; X86-LABEL: fadd_v4f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v = fadd <4 x double> %x, %y @@ -124,15 +116,11 @@ ; ; X86-LABEL: fsub_v4f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: vsubsd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v = fsub <4 x double> %x, %y @@ -168,15 +156,11 @@ ; ; X86-LABEL: fmul_v4f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v = fmul <4 x double> %x, %y @@ -212,15 +196,11 @@ ; ; X86-LABEL: fdiv_v4f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: vdivsd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v = fdiv <4 x double> %x, %y @@ -424,15 +404,11 @@ ; ; X86-LABEL: fsqrt_v4f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %x) @@ -506,15 +482,11 @@ ; ; X86-LABEL: fma_v4f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: vfmadd213sd {{.*#+}} xmm1 = (xmm0 * xmm1) + xmm2 ; X86-NEXT: vmovsd %xmm1, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v = call <4 x double> @llvm.fma.v4f64(<4 x double> %x, <4 x double> %y, <4 x double> %z) @@ -552,15 +524,11 @@ ; ; X86-LABEL: fabs_v4f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-NEXT: vmovlps %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) @@ -602,17 +570,13 @@ ; ; X86-LABEL: fmaxnum_v4f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 ; X86-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 ; X86-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 ; X86-NEXT: vmovlpd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %x, <4 x double> %y) @@ -654,17 +618,13 @@ ; ; X86-LABEL: fminnum_v4f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: vminsd %xmm0, %xmm1, %xmm2 ; X86-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 ; X86-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 ; X86-NEXT: vmovlpd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v = call <4 x double> @llvm.minnum.v4f64(<4 x double> %x, <4 x double> %y) @@ -725,15 +685,11 @@ ; ; X86-LABEL: maxpd_v4f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: vzeroupper ; X86-NEXT: retl %cmp = fcmp ogt <4 x double> %x, %y @@ -771,15 +727,11 @@ ; ; X86-LABEL: minpd_v4f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: vzeroupper ; X86-NEXT: retl %cmp = fcmp olt <4 x double> %x, %y @@ -826,17 +778,13 @@ ; ; X86-LABEL: copysign_v4f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: vandps {{\.LCPI.*}}, %xmm1, %xmm1 ; X86-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-NEXT: vorps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovlps %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v = call <4 x double> @llvm.copysign.v4f64(<4 x double> %x, <4 x double> %y) @@ -872,15 +820,11 @@ ; ; X86-LABEL: floor_v4f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) @@ -916,15 +860,11 @@ ; ; X86-LABEL: ceil_v4f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) @@ -960,15 +900,11 @@ ; ; X86-LABEL: trunc_v4f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v = call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) @@ -1004,15 +940,11 @@ ; ; X86-LABEL: rint_v4f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v = call <4 x double> @llvm.rint.v4f64(<4 x double> %x) @@ -1048,15 +980,11 @@ ; ; X86-LABEL: nearbyint_v4f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %x) @@ -1107,9 +1035,6 @@ ; ; X86-LABEL: round_v4f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: vandpd {{\.LCPI.*}}, %xmm0, %xmm1 ; X86-NEXT: vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1] @@ -1119,8 +1044,7 @@ ; X86-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: vzeroupper ; X86-NEXT: retl %v = call <4 x double> @llvm.round.v4f64(<4 x double> %x) Index: llvm/test/CodeGen/X86/extractelement-index.ll =================================================================== --- llvm/test/CodeGen/X86/extractelement-index.ll +++ llvm/test/CodeGen/X86/extractelement-index.ll @@ -443,16 +443,10 @@ define i8 @extractelement_v32i8_var(<32 x i8> %a, i256 %i) nounwind { ; SSE-LABEL: extractelement_v32i8_var: ; SSE: # %bb.0: -; SSE-NEXT: pushq %rbp -; SSE-NEXT: movq %rsp, %rbp -; SSE-NEXT: andq $-32, %rsp -; SSE-NEXT: subq $64, %rsp ; SSE-NEXT: andl $31, %edi -; SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, (%rsp) -; SSE-NEXT: movb (%rsp,%rdi), %al -; SSE-NEXT: movq %rbp, %rsp -; SSE-NEXT: popq %rbp +; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movb -40(%rsp,%rdi), %al ; SSE-NEXT: retq ; ; AVX-LABEL: extractelement_v32i8_var: @@ -493,16 +487,10 @@ define i16 @extractelement_v16i16_var(<16 x i16> %a, i256 %i) nounwind { ; SSE-LABEL: extractelement_v16i16_var: ; SSE: # %bb.0: -; SSE-NEXT: pushq %rbp -; SSE-NEXT: movq %rsp, %rbp -; SSE-NEXT: andq $-32, %rsp -; SSE-NEXT: subq $64, %rsp ; SSE-NEXT: andl $15, %edi -; SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, (%rsp) -; SSE-NEXT: movzwl (%rsp,%rdi,2), %eax -; SSE-NEXT: movq %rbp, %rsp -; SSE-NEXT: popq %rbp +; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movzwl -40(%rsp,%rdi,2), %eax ; SSE-NEXT: retq ; ; AVX-LABEL: extractelement_v16i16_var: @@ -543,16 +531,10 @@ define i32 @extractelement_v8i32_var(<8 x i32> %a, i256 %i) nounwind { ; SSE-LABEL: extractelement_v8i32_var: ; SSE: # %bb.0: -; SSE-NEXT: pushq %rbp -; SSE-NEXT: movq %rsp, %rbp -; SSE-NEXT: andq $-32, %rsp -; SSE-NEXT: subq $64, %rsp ; SSE-NEXT: andl $7, %edi -; SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, (%rsp) -; SSE-NEXT: movl (%rsp,%rdi,4), %eax -; SSE-NEXT: movq %rbp, %rsp -; SSE-NEXT: popq %rbp +; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movl -40(%rsp,%rdi,4), %eax ; SSE-NEXT: retq ; ; AVX-LABEL: extractelement_v8i32_var: @@ -593,16 +575,10 @@ define i64 @extractelement_v4i64_var(<4 x i64> %a, i256 %i) nounwind { ; SSE-LABEL: extractelement_v4i64_var: ; SSE: # %bb.0: -; SSE-NEXT: pushq %rbp -; SSE-NEXT: movq %rsp, %rbp -; SSE-NEXT: andq $-32, %rsp -; SSE-NEXT: subq $64, %rsp ; SSE-NEXT: andl $3, %edi -; SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, (%rsp) -; SSE-NEXT: movq (%rsp,%rdi,8), %rax -; SSE-NEXT: movq %rbp, %rsp -; SSE-NEXT: popq %rbp +; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE-NEXT: movq -40(%rsp,%rdi,8), %rax ; SSE-NEXT: retq ; ; AVX-LABEL: extractelement_v4i64_var: Index: llvm/test/CodeGen/X86/fast-isel-fneg.ll =================================================================== --- llvm/test/CodeGen/X86/fast-isel-fneg.ll +++ llvm/test/CodeGen/X86/fast-isel-fneg.ll @@ -13,16 +13,12 @@ ; ; SSE2-LABEL: doo: ; SSE2: # %bb.0: -; SSE2-NEXT: pushl %ebp -; SSE2-NEXT: movl %esp, %ebp -; SSE2-NEXT: andl $-8, %esp ; SSE2-NEXT: subl $8, %esp ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE2-NEXT: xorps {{\.LCPI.*}}, %xmm0 ; SSE2-NEXT: movlps %xmm0, (%esp) ; SSE2-NEXT: fldl (%esp) -; SSE2-NEXT: movl %ebp, %esp -; SSE2-NEXT: popl %ebp +; SSE2-NEXT: addl $8, %esp ; SSE2-NEXT: retl %y = fsub double -0.0, %x ret double %y @@ -111,16 +107,12 @@ ; ; SSE2-LABEL: too: ; SSE2: # %bb.0: -; SSE2-NEXT: pushl %ebp -; SSE2-NEXT: movl %esp, %ebp -; SSE2-NEXT: andl $-8, %esp ; SSE2-NEXT: subl $8, %esp ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE2-NEXT: xorps {{\.LCPI.*}}, %xmm0 ; SSE2-NEXT: movlps %xmm0, (%esp) ; SSE2-NEXT: fldl (%esp) -; SSE2-NEXT: movl %ebp, %esp -; SSE2-NEXT: popl %ebp +; SSE2-NEXT: addl $8, %esp ; SSE2-NEXT: retl %y = fneg double %x ret double %y Index: llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll =================================================================== --- llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll +++ llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll @@ -20,36 +20,24 @@ ; ; SSE2_X86-LABEL: int_to_double_rr: ; SSE2_X86: # %bb.0: # %entry -; SSE2_X86-NEXT: pushl %ebp -; SSE2_X86-NEXT: .cfi_def_cfa_offset 8 -; SSE2_X86-NEXT: .cfi_offset %ebp, -8 -; SSE2_X86-NEXT: movl %esp, %ebp -; SSE2_X86-NEXT: .cfi_def_cfa_register %ebp -; SSE2_X86-NEXT: andl $-8, %esp ; SSE2_X86-NEXT: subl $8, %esp -; SSE2_X86-NEXT: cvtsi2sdl 8(%ebp), %xmm0 +; SSE2_X86-NEXT: .cfi_def_cfa_offset 12 +; SSE2_X86-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 ; SSE2_X86-NEXT: movsd %xmm0, (%esp) ; SSE2_X86-NEXT: fldl (%esp) -; SSE2_X86-NEXT: movl %ebp, %esp -; SSE2_X86-NEXT: popl %ebp -; SSE2_X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE2_X86-NEXT: addl $8, %esp +; SSE2_X86-NEXT: .cfi_def_cfa_offset 4 ; SSE2_X86-NEXT: retl ; ; AVX_X86-LABEL: int_to_double_rr: ; AVX_X86: # %bb.0: # %entry -; AVX_X86-NEXT: pushl %ebp -; AVX_X86-NEXT: .cfi_def_cfa_offset 8 -; AVX_X86-NEXT: .cfi_offset %ebp, -8 -; AVX_X86-NEXT: movl %esp, %ebp -; AVX_X86-NEXT: .cfi_def_cfa_register %ebp -; AVX_X86-NEXT: andl $-8, %esp ; AVX_X86-NEXT: subl $8, %esp -; AVX_X86-NEXT: vcvtsi2sdl 8(%ebp), %xmm0, %xmm0 +; AVX_X86-NEXT: .cfi_def_cfa_offset 12 +; AVX_X86-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX_X86-NEXT: vmovsd %xmm0, (%esp) ; AVX_X86-NEXT: fldl (%esp) -; AVX_X86-NEXT: movl %ebp, %esp -; AVX_X86-NEXT: popl %ebp -; AVX_X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX_X86-NEXT: addl $8, %esp +; AVX_X86-NEXT: .cfi_def_cfa_offset 4 ; AVX_X86-NEXT: retl entry: %0 = sitofp i32 %a to double @@ -69,38 +57,26 @@ ; ; SSE2_X86-LABEL: int_to_double_rm: ; SSE2_X86: # %bb.0: # %entry -; SSE2_X86-NEXT: pushl %ebp -; SSE2_X86-NEXT: .cfi_def_cfa_offset 8 -; SSE2_X86-NEXT: .cfi_offset %ebp, -8 -; SSE2_X86-NEXT: movl %esp, %ebp -; SSE2_X86-NEXT: .cfi_def_cfa_register %ebp -; SSE2_X86-NEXT: andl $-8, %esp ; SSE2_X86-NEXT: subl $8, %esp -; SSE2_X86-NEXT: movl 8(%ebp), %eax +; SSE2_X86-NEXT: .cfi_def_cfa_offset 12 +; SSE2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE2_X86-NEXT: cvtsi2sdl (%eax), %xmm0 ; SSE2_X86-NEXT: movsd %xmm0, (%esp) ; SSE2_X86-NEXT: fldl (%esp) -; SSE2_X86-NEXT: movl %ebp, %esp -; SSE2_X86-NEXT: popl %ebp -; SSE2_X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE2_X86-NEXT: addl $8, %esp +; SSE2_X86-NEXT: .cfi_def_cfa_offset 4 ; SSE2_X86-NEXT: retl ; ; AVX_X86-LABEL: int_to_double_rm: ; AVX_X86: # %bb.0: # %entry -; AVX_X86-NEXT: pushl %ebp -; AVX_X86-NEXT: .cfi_def_cfa_offset 8 -; AVX_X86-NEXT: .cfi_offset %ebp, -8 -; AVX_X86-NEXT: movl %esp, %ebp -; AVX_X86-NEXT: .cfi_def_cfa_register %ebp -; AVX_X86-NEXT: andl $-8, %esp ; AVX_X86-NEXT: subl $8, %esp -; AVX_X86-NEXT: movl 8(%ebp), %eax +; AVX_X86-NEXT: .cfi_def_cfa_offset 12 +; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; AVX_X86-NEXT: vcvtsi2sdl (%eax), %xmm0, %xmm0 ; AVX_X86-NEXT: vmovsd %xmm0, (%esp) ; AVX_X86-NEXT: fldl (%esp) -; AVX_X86-NEXT: movl %ebp, %esp -; AVX_X86-NEXT: popl %ebp -; AVX_X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX_X86-NEXT: addl $8, %esp +; AVX_X86-NEXT: .cfi_def_cfa_offset 4 ; AVX_X86-NEXT: retl entry: %0 = load i32, i32* %a @@ -121,38 +97,26 @@ ; ; SSE2_X86-LABEL: int_to_double_rm_optsize: ; SSE2_X86: # %bb.0: # %entry -; SSE2_X86-NEXT: pushl %ebp -; SSE2_X86-NEXT: .cfi_def_cfa_offset 8 -; SSE2_X86-NEXT: .cfi_offset %ebp, -8 -; SSE2_X86-NEXT: movl %esp, %ebp -; SSE2_X86-NEXT: .cfi_def_cfa_register %ebp -; SSE2_X86-NEXT: andl $-8, %esp ; SSE2_X86-NEXT: subl $8, %esp -; SSE2_X86-NEXT: movl 8(%ebp), %eax +; SSE2_X86-NEXT: .cfi_def_cfa_offset 12 +; SSE2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE2_X86-NEXT: cvtsi2sdl (%eax), %xmm0 ; SSE2_X86-NEXT: movsd %xmm0, (%esp) ; SSE2_X86-NEXT: fldl (%esp) -; SSE2_X86-NEXT: movl %ebp, %esp -; SSE2_X86-NEXT: popl %ebp -; SSE2_X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE2_X86-NEXT: addl $8, %esp +; SSE2_X86-NEXT: .cfi_def_cfa_offset 4 ; SSE2_X86-NEXT: retl ; ; AVX_X86-LABEL: int_to_double_rm_optsize: ; AVX_X86: # %bb.0: # %entry -; AVX_X86-NEXT: pushl %ebp -; AVX_X86-NEXT: .cfi_def_cfa_offset 8 -; AVX_X86-NEXT: .cfi_offset %ebp, -8 -; AVX_X86-NEXT: movl %esp, %ebp -; AVX_X86-NEXT: .cfi_def_cfa_register %ebp -; AVX_X86-NEXT: andl $-8, %esp ; AVX_X86-NEXT: subl $8, %esp -; AVX_X86-NEXT: movl 8(%ebp), %eax +; AVX_X86-NEXT: .cfi_def_cfa_offset 12 +; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; AVX_X86-NEXT: vcvtsi2sdl (%eax), %xmm0, %xmm0 ; AVX_X86-NEXT: vmovsd %xmm0, (%esp) ; AVX_X86-NEXT: fldl (%esp) -; AVX_X86-NEXT: movl %ebp, %esp -; AVX_X86-NEXT: popl %ebp -; AVX_X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX_X86-NEXT: addl $8, %esp +; AVX_X86-NEXT: .cfi_def_cfa_offset 4 ; AVX_X86-NEXT: retl entry: %0 = load i32, i32* %a Index: llvm/test/CodeGen/X86/fast-isel-uint-float-conversion.ll =================================================================== --- llvm/test/CodeGen/X86/fast-isel-uint-float-conversion.ll +++ llvm/test/CodeGen/X86/fast-isel-uint-float-conversion.ll @@ -11,19 +11,13 @@ ; ; AVX_X86-LABEL: int_to_double_rr: ; AVX_X86: # %bb.0: # %entry -; AVX_X86-NEXT: pushl %ebp -; AVX_X86-NEXT: .cfi_def_cfa_offset 8 -; AVX_X86-NEXT: .cfi_offset %ebp, -8 -; AVX_X86-NEXT: movl %esp, %ebp -; AVX_X86-NEXT: .cfi_def_cfa_register %ebp -; AVX_X86-NEXT: andl $-8, %esp ; AVX_X86-NEXT: subl $8, %esp -; AVX_X86-NEXT: vcvtusi2sdl 8(%ebp), %xmm0, %xmm0 +; AVX_X86-NEXT: .cfi_def_cfa_offset 12 +; AVX_X86-NEXT: vcvtusi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX_X86-NEXT: vmovsd %xmm0, (%esp) ; AVX_X86-NEXT: fldl (%esp) -; AVX_X86-NEXT: movl %ebp, %esp -; AVX_X86-NEXT: popl %ebp -; AVX_X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX_X86-NEXT: addl $8, %esp +; AVX_X86-NEXT: .cfi_def_cfa_offset 4 ; AVX_X86-NEXT: retl entry: %0 = uitofp i32 %a to double @@ -38,20 +32,14 @@ ; ; AVX_X86-LABEL: int_to_double_rm: ; AVX_X86: # %bb.0: # %entry -; AVX_X86-NEXT: pushl %ebp -; AVX_X86-NEXT: .cfi_def_cfa_offset 8 -; AVX_X86-NEXT: .cfi_offset %ebp, -8 -; AVX_X86-NEXT: movl %esp, %ebp -; AVX_X86-NEXT: .cfi_def_cfa_register %ebp -; AVX_X86-NEXT: andl $-8, %esp ; AVX_X86-NEXT: subl $8, %esp -; AVX_X86-NEXT: movl 8(%ebp), %eax +; AVX_X86-NEXT: .cfi_def_cfa_offset 12 +; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; AVX_X86-NEXT: vcvtusi2sdl (%eax), %xmm0, %xmm0 ; AVX_X86-NEXT: vmovsd %xmm0, (%esp) ; AVX_X86-NEXT: fldl (%esp) -; AVX_X86-NEXT: movl %ebp, %esp -; AVX_X86-NEXT: popl %ebp -; AVX_X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX_X86-NEXT: addl $8, %esp +; AVX_X86-NEXT: .cfi_def_cfa_offset 4 ; AVX_X86-NEXT: retl entry: %0 = load i32, i32* %a @@ -67,20 +55,14 @@ ; ; AVX_X86-LABEL: int_to_double_rm_optsize: ; AVX_X86: # %bb.0: # %entry -; AVX_X86-NEXT: pushl %ebp -; AVX_X86-NEXT: .cfi_def_cfa_offset 8 -; AVX_X86-NEXT: .cfi_offset %ebp, -8 -; AVX_X86-NEXT: movl %esp, %ebp -; AVX_X86-NEXT: .cfi_def_cfa_register %ebp -; AVX_X86-NEXT: andl $-8, %esp ; AVX_X86-NEXT: subl $8, %esp -; AVX_X86-NEXT: movl 8(%ebp), %eax +; AVX_X86-NEXT: .cfi_def_cfa_offset 12 +; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; AVX_X86-NEXT: vcvtusi2sdl (%eax), %xmm0, %xmm0 ; AVX_X86-NEXT: vmovsd %xmm0, (%esp) ; AVX_X86-NEXT: fldl (%esp) -; AVX_X86-NEXT: movl %ebp, %esp -; AVX_X86-NEXT: popl %ebp -; AVX_X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX_X86-NEXT: addl $8, %esp +; AVX_X86-NEXT: .cfi_def_cfa_offset 4 ; AVX_X86-NEXT: retl entry: %0 = load i32, i32* %a Index: llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll =================================================================== --- llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll +++ llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll @@ -691,21 +691,15 @@ define double @sitofp_i8tof64(i8 %x) #0 { ; SSE-X86-LABEL: sitofp_i8tof64: ; SSE-X86: # %bb.0: -; SSE-X86-NEXT: pushl %ebp -; SSE-X86-NEXT: .cfi_def_cfa_offset 8 -; SSE-X86-NEXT: .cfi_offset %ebp, -8 -; SSE-X86-NEXT: movl %esp, %ebp -; SSE-X86-NEXT: .cfi_def_cfa_register %ebp -; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $8, %esp -; SSE-X86-NEXT: movsbl 8(%ebp), %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 12 +; SSE-X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax ; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) ; SSE-X86-NEXT: wait -; SSE-X86-NEXT: movl %ebp, %esp -; SSE-X86-NEXT: popl %ebp -; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: addl $8, %esp +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: sitofp_i8tof64: @@ -716,21 +710,15 @@ ; ; AVX-X86-LABEL: sitofp_i8tof64: ; AVX-X86: # %bb.0: -; AVX-X86-NEXT: pushl %ebp -; AVX-X86-NEXT: .cfi_def_cfa_offset 8 -; AVX-X86-NEXT: .cfi_offset %ebp, -8 -; AVX-X86-NEXT: movl %esp, %ebp -; AVX-X86-NEXT: .cfi_def_cfa_register %ebp -; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp -; AVX-X86-NEXT: movsbl 8(%ebp), %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 12 +; AVX-X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax ; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait -; AVX-X86-NEXT: movl %ebp, %esp -; AVX-X86-NEXT: popl %ebp -; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: addl $8, %esp +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: sitofp_i8tof64: @@ -759,21 +747,15 @@ define double @sitofp_i16tof64(i16 %x) #0 { ; SSE-X86-LABEL: sitofp_i16tof64: ; SSE-X86: # %bb.0: -; SSE-X86-NEXT: pushl %ebp -; SSE-X86-NEXT: .cfi_def_cfa_offset 8 -; SSE-X86-NEXT: .cfi_offset %ebp, -8 -; SSE-X86-NEXT: movl %esp, %ebp -; SSE-X86-NEXT: .cfi_def_cfa_register %ebp -; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $8, %esp -; SSE-X86-NEXT: movswl 8(%ebp), %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 12 +; SSE-X86-NEXT: movswl {{[0-9]+}}(%esp), %eax ; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) ; SSE-X86-NEXT: wait -; SSE-X86-NEXT: movl %ebp, %esp -; SSE-X86-NEXT: popl %ebp -; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: addl $8, %esp +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: sitofp_i16tof64: @@ -784,21 +766,15 @@ ; ; AVX-X86-LABEL: sitofp_i16tof64: ; AVX-X86: # %bb.0: -; AVX-X86-NEXT: pushl %ebp -; AVX-X86-NEXT: .cfi_def_cfa_offset 8 -; AVX-X86-NEXT: .cfi_offset %ebp, -8 -; AVX-X86-NEXT: movl %esp, %ebp -; AVX-X86-NEXT: .cfi_def_cfa_register %ebp -; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp -; AVX-X86-NEXT: movswl 8(%ebp), %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 12 +; AVX-X86-NEXT: movswl {{[0-9]+}}(%esp), %eax ; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait -; AVX-X86-NEXT: movl %ebp, %esp -; AVX-X86-NEXT: popl %ebp -; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: addl $8, %esp +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: sitofp_i16tof64: @@ -827,20 +803,14 @@ define double @sitofp_i32tof64(i32 %x) #0 { ; SSE-X86-LABEL: sitofp_i32tof64: ; SSE-X86: # %bb.0: -; SSE-X86-NEXT: pushl %ebp -; SSE-X86-NEXT: .cfi_def_cfa_offset 8 -; SSE-X86-NEXT: .cfi_offset %ebp, -8 -; SSE-X86-NEXT: movl %esp, %ebp -; SSE-X86-NEXT: .cfi_def_cfa_register %ebp -; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $8, %esp -; SSE-X86-NEXT: cvtsi2sdl 8(%ebp), %xmm0 +; SSE-X86-NEXT: .cfi_def_cfa_offset 12 +; SSE-X86-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) ; SSE-X86-NEXT: wait -; SSE-X86-NEXT: movl %ebp, %esp -; SSE-X86-NEXT: popl %ebp -; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: addl $8, %esp +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: sitofp_i32tof64: @@ -850,20 +820,14 @@ ; ; AVX-X86-LABEL: sitofp_i32tof64: ; AVX-X86: # %bb.0: -; AVX-X86-NEXT: pushl %ebp -; AVX-X86-NEXT: .cfi_def_cfa_offset 8 -; AVX-X86-NEXT: .cfi_offset %ebp, -8 -; AVX-X86-NEXT: movl %esp, %ebp -; AVX-X86-NEXT: .cfi_def_cfa_register %ebp -; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp -; AVX-X86-NEXT: vcvtsi2sdl 8(%ebp), %xmm0, %xmm0 +; AVX-X86-NEXT: .cfi_def_cfa_offset 12 +; AVX-X86-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait -; AVX-X86-NEXT: movl %ebp, %esp -; AVX-X86-NEXT: popl %ebp -; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: addl $8, %esp +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: sitofp_i32tof64: @@ -949,23 +913,17 @@ define double @uitofp_i1tof64(i1 %x) #0 { ; SSE-X86-LABEL: uitofp_i1tof64: ; SSE-X86: # %bb.0: -; SSE-X86-NEXT: pushl %ebp -; SSE-X86-NEXT: .cfi_def_cfa_offset 8 -; SSE-X86-NEXT: .cfi_offset %ebp, -8 -; SSE-X86-NEXT: movl %esp, %ebp -; SSE-X86-NEXT: .cfi_def_cfa_register %ebp -; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $8, %esp -; SSE-X86-NEXT: movb 8(%ebp), %al +; SSE-X86-NEXT: .cfi_def_cfa_offset 12 +; SSE-X86-NEXT: movb {{[0-9]+}}(%esp), %al ; SSE-X86-NEXT: andb $1, %al ; SSE-X86-NEXT: movzbl %al, %eax ; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) ; SSE-X86-NEXT: wait -; SSE-X86-NEXT: movl %ebp, %esp -; SSE-X86-NEXT: popl %ebp -; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: addl $8, %esp +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: uitofp_i1tof64: @@ -976,23 +934,17 @@ ; ; AVX-X86-LABEL: uitofp_i1tof64: ; AVX-X86: # %bb.0: -; AVX-X86-NEXT: pushl %ebp -; AVX-X86-NEXT: .cfi_def_cfa_offset 8 -; AVX-X86-NEXT: .cfi_offset %ebp, -8 -; AVX-X86-NEXT: movl %esp, %ebp -; AVX-X86-NEXT: .cfi_def_cfa_register %ebp -; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp -; AVX-X86-NEXT: movb 8(%ebp), %al +; AVX-X86-NEXT: .cfi_def_cfa_offset 12 +; AVX-X86-NEXT: movb {{[0-9]+}}(%esp), %al ; AVX-X86-NEXT: andb $1, %al ; AVX-X86-NEXT: movzbl %al, %eax ; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait -; AVX-X86-NEXT: movl %ebp, %esp -; AVX-X86-NEXT: popl %ebp -; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: addl $8, %esp +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: uitofp_i1tof64: @@ -1023,21 +975,15 @@ define double @uitofp_i8tof64(i8 %x) #0 { ; SSE-X86-LABEL: uitofp_i8tof64: ; SSE-X86: # %bb.0: -; SSE-X86-NEXT: pushl %ebp -; SSE-X86-NEXT: .cfi_def_cfa_offset 8 -; SSE-X86-NEXT: .cfi_offset %ebp, -8 -; SSE-X86-NEXT: movl %esp, %ebp -; SSE-X86-NEXT: .cfi_def_cfa_register %ebp -; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $8, %esp -; SSE-X86-NEXT: movzbl 8(%ebp), %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 12 +; SSE-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) ; SSE-X86-NEXT: wait -; SSE-X86-NEXT: movl %ebp, %esp -; SSE-X86-NEXT: popl %ebp -; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: addl $8, %esp +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: uitofp_i8tof64: @@ -1048,21 +994,15 @@ ; ; AVX-X86-LABEL: uitofp_i8tof64: ; AVX-X86: # %bb.0: -; AVX-X86-NEXT: pushl %ebp -; AVX-X86-NEXT: .cfi_def_cfa_offset 8 -; AVX-X86-NEXT: .cfi_offset %ebp, -8 -; AVX-X86-NEXT: movl %esp, %ebp -; AVX-X86-NEXT: .cfi_def_cfa_register %ebp -; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp -; AVX-X86-NEXT: movzbl 8(%ebp), %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 12 +; AVX-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait -; AVX-X86-NEXT: movl %ebp, %esp -; AVX-X86-NEXT: popl %ebp -; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: addl $8, %esp +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: uitofp_i8tof64: @@ -1091,21 +1031,15 @@ define double @uitofp_i16tof64(i16 %x) #0 { ; SSE-X86-LABEL: uitofp_i16tof64: ; SSE-X86: # %bb.0: -; SSE-X86-NEXT: pushl %ebp -; SSE-X86-NEXT: .cfi_def_cfa_offset 8 -; SSE-X86-NEXT: .cfi_offset %ebp, -8 -; SSE-X86-NEXT: movl %esp, %ebp -; SSE-X86-NEXT: .cfi_def_cfa_register %ebp -; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $8, %esp -; SSE-X86-NEXT: movzwl 8(%ebp), %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 12 +; SSE-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) ; SSE-X86-NEXT: wait -; SSE-X86-NEXT: movl %ebp, %esp -; SSE-X86-NEXT: popl %ebp -; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: addl $8, %esp +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: uitofp_i16tof64: @@ -1116,21 +1050,15 @@ ; ; AVX-X86-LABEL: uitofp_i16tof64: ; AVX-X86: # %bb.0: -; AVX-X86-NEXT: pushl %ebp -; AVX-X86-NEXT: .cfi_def_cfa_offset 8 -; AVX-X86-NEXT: .cfi_offset %ebp, -8 -; AVX-X86-NEXT: movl %esp, %ebp -; AVX-X86-NEXT: .cfi_def_cfa_register %ebp -; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp -; AVX-X86-NEXT: movzwl 8(%ebp), %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 12 +; AVX-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait -; AVX-X86-NEXT: movl %ebp, %esp -; AVX-X86-NEXT: popl %ebp -; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: addl $8, %esp +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: uitofp_i16tof64: @@ -1159,13 +1087,8 @@ define double @uitofp_i32tof64(i32 %x) #0 { ; SSE-X86-LABEL: uitofp_i32tof64: ; SSE-X86: # %bb.0: -; SSE-X86-NEXT: pushl %ebp -; SSE-X86-NEXT: .cfi_def_cfa_offset 8 -; SSE-X86-NEXT: .cfi_offset %ebp, -8 -; SSE-X86-NEXT: movl %esp, %ebp -; SSE-X86-NEXT: .cfi_def_cfa_register %ebp -; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $8, %esp +; SSE-X86-NEXT: .cfi_def_cfa_offset 12 ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-X86-NEXT: orpd %xmm0, %xmm1 @@ -1173,9 +1096,8 @@ ; SSE-X86-NEXT: movsd %xmm1, (%esp) ; SSE-X86-NEXT: fldl (%esp) ; SSE-X86-NEXT: wait -; SSE-X86-NEXT: movl %ebp, %esp -; SSE-X86-NEXT: popl %ebp -; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: addl $8, %esp +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: uitofp_i32tof64: @@ -1186,13 +1108,8 @@ ; ; AVX1-X86-LABEL: uitofp_i32tof64: ; AVX1-X86: # %bb.0: -; AVX1-X86-NEXT: pushl %ebp -; AVX1-X86-NEXT: .cfi_def_cfa_offset 8 -; AVX1-X86-NEXT: .cfi_offset %ebp, -8 -; AVX1-X86-NEXT: movl %esp, %ebp -; AVX1-X86-NEXT: .cfi_def_cfa_register %ebp -; AVX1-X86-NEXT: andl $-8, %esp ; AVX1-X86-NEXT: subl $8, %esp +; AVX1-X86-NEXT: .cfi_def_cfa_offset 12 ; AVX1-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX1-X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; AVX1-X86-NEXT: vorpd %xmm0, %xmm1, %xmm1 @@ -1200,9 +1117,8 @@ ; AVX1-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX1-X86-NEXT: fldl (%esp) ; AVX1-X86-NEXT: wait -; AVX1-X86-NEXT: movl %ebp, %esp -; AVX1-X86-NEXT: popl %ebp -; AVX1-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX1-X86-NEXT: addl $8, %esp +; AVX1-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX1-X86-NEXT: retl ; ; AVX1-X64-LABEL: uitofp_i32tof64: @@ -1213,20 +1129,14 @@ ; ; AVX512-X86-LABEL: uitofp_i32tof64: ; AVX512-X86: # %bb.0: -; AVX512-X86-NEXT: pushl %ebp -; AVX512-X86-NEXT: .cfi_def_cfa_offset 8 -; AVX512-X86-NEXT: .cfi_offset %ebp, -8 -; AVX512-X86-NEXT: movl %esp, %ebp -; AVX512-X86-NEXT: .cfi_def_cfa_register %ebp -; AVX512-X86-NEXT: andl $-8, %esp ; AVX512-X86-NEXT: subl $8, %esp -; AVX512-X86-NEXT: vcvtusi2sdl 8(%ebp), %xmm0, %xmm0 +; AVX512-X86-NEXT: .cfi_def_cfa_offset 12 +; AVX512-X86-NEXT: vcvtusi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX512-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX512-X86-NEXT: fldl (%esp) ; AVX512-X86-NEXT: wait -; AVX512-X86-NEXT: movl %ebp, %esp -; AVX512-X86-NEXT: popl %ebp -; AVX512-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX512-X86-NEXT: addl $8, %esp +; AVX512-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX512-X86-NEXT: retl ; ; AVX512-X64-LABEL: uitofp_i32tof64: @@ -1261,13 +1171,8 @@ define double @uitofp_i64tof64(i64 %x) #0 { ; SSE-X86-LABEL: uitofp_i64tof64: ; SSE-X86: # %bb.0: -; SSE-X86-NEXT: pushl %ebp -; SSE-X86-NEXT: .cfi_def_cfa_offset 8 -; SSE-X86-NEXT: .cfi_offset %ebp, -8 -; SSE-X86-NEXT: movl %esp, %ebp -; SSE-X86-NEXT: .cfi_def_cfa_register %ebp -; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $8, %esp +; SSE-X86-NEXT: .cfi_def_cfa_offset 12 ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-X86-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; SSE-X86-NEXT: subpd {{\.LCPI.*}}, %xmm0 @@ -1277,9 +1182,8 @@ ; SSE-X86-NEXT: movlpd %xmm1, (%esp) ; SSE-X86-NEXT: fldl (%esp) ; SSE-X86-NEXT: wait -; SSE-X86-NEXT: movl %ebp, %esp -; SSE-X86-NEXT: popl %ebp -; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: addl $8, %esp +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: uitofp_i64tof64: @@ -1294,13 +1198,8 @@ ; ; AVX-X86-LABEL: uitofp_i64tof64: ; AVX-X86: # %bb.0: -; AVX-X86-NEXT: pushl %ebp -; AVX-X86-NEXT: .cfi_def_cfa_offset 8 -; AVX-X86-NEXT: .cfi_offset %ebp, -8 -; AVX-X86-NEXT: movl %esp, %ebp -; AVX-X86-NEXT: .cfi_def_cfa_register %ebp -; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: .cfi_def_cfa_offset 12 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-X86-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; AVX-X86-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0 @@ -1309,9 +1208,8 @@ ; AVX-X86-NEXT: vmovlpd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait -; AVX-X86-NEXT: movl %ebp, %esp -; AVX-X86-NEXT: popl %ebp -; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: addl $8, %esp +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl ; ; AVX1-X64-LABEL: uitofp_i64tof64: Index: llvm/test/CodeGen/X86/fp-strict-scalar-round.ll =================================================================== --- llvm/test/CodeGen/X86/fp-strict-scalar-round.ll +++ llvm/test/CodeGen/X86/fp-strict-scalar-round.ll @@ -61,21 +61,15 @@ define double @fceilf64(double %f) #0 { ; SSE41-X86-LABEL: fceilf64: ; SSE41-X86: # %bb.0: -; SSE41-X86-NEXT: pushl %ebp -; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 -; SSE41-X86-NEXT: .cfi_offset %ebp, -8 -; SSE41-X86-NEXT: movl %esp, %ebp -; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp -; SSE41-X86-NEXT: andl $-8, %esp ; SSE41-X86-NEXT: subl $8, %esp +; SSE41-X86-NEXT: .cfi_def_cfa_offset 12 ; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE41-X86-NEXT: roundsd $10, %xmm0, %xmm0 ; SSE41-X86-NEXT: movsd %xmm0, (%esp) ; SSE41-X86-NEXT: fldl (%esp) ; SSE41-X86-NEXT: wait -; SSE41-X86-NEXT: movl %ebp, %esp -; SSE41-X86-NEXT: popl %ebp -; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE41-X86-NEXT: addl $8, %esp +; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE41-X86-NEXT: retl ; ; SSE41-X64-LABEL: fceilf64: @@ -85,21 +79,15 @@ ; ; AVX-X86-LABEL: fceilf64: ; AVX-X86: # %bb.0: -; AVX-X86-NEXT: pushl %ebp -; AVX-X86-NEXT: .cfi_def_cfa_offset 8 -; AVX-X86-NEXT: .cfi_offset %ebp, -8 -; AVX-X86-NEXT: movl %esp, %ebp -; AVX-X86-NEXT: .cfi_def_cfa_register %ebp -; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: .cfi_def_cfa_offset 12 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-X86-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait -; AVX-X86-NEXT: movl %ebp, %esp -; AVX-X86-NEXT: popl %ebp -; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: addl $8, %esp +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fceilf64: @@ -155,21 +143,15 @@ define double @ffloorf64(double %f) #0 { ; SSE41-X86-LABEL: ffloorf64: ; SSE41-X86: # %bb.0: -; SSE41-X86-NEXT: pushl %ebp -; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 -; SSE41-X86-NEXT: .cfi_offset %ebp, -8 -; SSE41-X86-NEXT: movl %esp, %ebp -; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp -; SSE41-X86-NEXT: andl $-8, %esp ; SSE41-X86-NEXT: subl $8, %esp +; SSE41-X86-NEXT: .cfi_def_cfa_offset 12 ; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE41-X86-NEXT: roundsd $9, %xmm0, %xmm0 ; SSE41-X86-NEXT: movsd %xmm0, (%esp) ; SSE41-X86-NEXT: fldl (%esp) ; SSE41-X86-NEXT: wait -; SSE41-X86-NEXT: movl %ebp, %esp -; SSE41-X86-NEXT: popl %ebp -; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE41-X86-NEXT: addl $8, %esp +; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE41-X86-NEXT: retl ; ; SSE41-X64-LABEL: ffloorf64: @@ -179,21 +161,15 @@ ; ; AVX-X86-LABEL: ffloorf64: ; AVX-X86: # %bb.0: -; AVX-X86-NEXT: pushl %ebp -; AVX-X86-NEXT: .cfi_def_cfa_offset 8 -; AVX-X86-NEXT: .cfi_offset %ebp, -8 -; AVX-X86-NEXT: movl %esp, %ebp -; AVX-X86-NEXT: .cfi_def_cfa_register %ebp -; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: .cfi_def_cfa_offset 12 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-X86-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait -; AVX-X86-NEXT: movl %ebp, %esp -; AVX-X86-NEXT: popl %ebp -; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: addl $8, %esp +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: ffloorf64: @@ -249,21 +225,15 @@ define double @ftruncf64(double %f) #0 { ; SSE41-X86-LABEL: ftruncf64: ; SSE41-X86: # %bb.0: -; SSE41-X86-NEXT: pushl %ebp -; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 -; SSE41-X86-NEXT: .cfi_offset %ebp, -8 -; SSE41-X86-NEXT: movl %esp, %ebp -; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp -; SSE41-X86-NEXT: andl $-8, %esp ; SSE41-X86-NEXT: subl $8, %esp +; SSE41-X86-NEXT: .cfi_def_cfa_offset 12 ; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE41-X86-NEXT: roundsd $11, %xmm0, %xmm0 ; SSE41-X86-NEXT: movsd %xmm0, (%esp) ; SSE41-X86-NEXT: fldl (%esp) ; SSE41-X86-NEXT: wait -; SSE41-X86-NEXT: movl %ebp, %esp -; SSE41-X86-NEXT: popl %ebp -; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE41-X86-NEXT: addl $8, %esp +; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE41-X86-NEXT: retl ; ; SSE41-X64-LABEL: ftruncf64: @@ -273,21 +243,15 @@ ; ; AVX-X86-LABEL: ftruncf64: ; AVX-X86: # %bb.0: -; AVX-X86-NEXT: pushl %ebp -; AVX-X86-NEXT: .cfi_def_cfa_offset 8 -; AVX-X86-NEXT: .cfi_offset %ebp, -8 -; AVX-X86-NEXT: movl %esp, %ebp -; AVX-X86-NEXT: .cfi_def_cfa_register %ebp -; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: .cfi_def_cfa_offset 12 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-X86-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait -; AVX-X86-NEXT: movl %ebp, %esp -; AVX-X86-NEXT: popl %ebp -; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: addl $8, %esp +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: ftruncf64: @@ -344,21 +308,15 @@ define double @frintf64(double %f) #0 { ; SSE41-X86-LABEL: frintf64: ; SSE41-X86: # %bb.0: -; SSE41-X86-NEXT: pushl %ebp -; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 -; SSE41-X86-NEXT: .cfi_offset %ebp, -8 -; SSE41-X86-NEXT: movl %esp, %ebp -; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp -; SSE41-X86-NEXT: andl $-8, %esp ; SSE41-X86-NEXT: subl $8, %esp +; SSE41-X86-NEXT: .cfi_def_cfa_offset 12 ; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE41-X86-NEXT: roundsd $4, %xmm0, %xmm0 ; SSE41-X86-NEXT: movsd %xmm0, (%esp) ; SSE41-X86-NEXT: fldl (%esp) ; SSE41-X86-NEXT: wait -; SSE41-X86-NEXT: movl %ebp, %esp -; SSE41-X86-NEXT: popl %ebp -; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE41-X86-NEXT: addl $8, %esp +; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE41-X86-NEXT: retl ; ; SSE41-X64-LABEL: frintf64: @@ -368,21 +326,15 @@ ; ; AVX-X86-LABEL: frintf64: ; AVX-X86: # %bb.0: -; AVX-X86-NEXT: pushl %ebp -; AVX-X86-NEXT: .cfi_def_cfa_offset 8 -; AVX-X86-NEXT: .cfi_offset %ebp, -8 -; AVX-X86-NEXT: movl %esp, %ebp -; AVX-X86-NEXT: .cfi_def_cfa_register %ebp -; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: .cfi_def_cfa_offset 12 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-X86-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait -; AVX-X86-NEXT: movl %ebp, %esp -; AVX-X86-NEXT: popl %ebp -; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: addl $8, %esp +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: frintf64: @@ -440,21 +392,15 @@ define double @fnearbyintf64(double %f) #0 { ; SSE41-X86-LABEL: fnearbyintf64: ; SSE41-X86: # %bb.0: -; SSE41-X86-NEXT: pushl %ebp -; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 -; SSE41-X86-NEXT: .cfi_offset %ebp, -8 -; SSE41-X86-NEXT: movl %esp, %ebp -; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp -; SSE41-X86-NEXT: andl $-8, %esp ; SSE41-X86-NEXT: subl $8, %esp +; SSE41-X86-NEXT: .cfi_def_cfa_offset 12 ; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE41-X86-NEXT: roundsd $12, %xmm0, %xmm0 ; SSE41-X86-NEXT: movsd %xmm0, (%esp) ; SSE41-X86-NEXT: fldl (%esp) ; SSE41-X86-NEXT: wait -; SSE41-X86-NEXT: movl %ebp, %esp -; SSE41-X86-NEXT: popl %ebp -; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE41-X86-NEXT: addl $8, %esp +; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE41-X86-NEXT: retl ; ; SSE41-X64-LABEL: fnearbyintf64: @@ -464,21 +410,15 @@ ; ; AVX-X86-LABEL: fnearbyintf64: ; AVX-X86: # %bb.0: -; AVX-X86-NEXT: pushl %ebp -; AVX-X86-NEXT: .cfi_def_cfa_offset 8 -; AVX-X86-NEXT: .cfi_offset %ebp, -8 -; AVX-X86-NEXT: movl %esp, %ebp -; AVX-X86-NEXT: .cfi_def_cfa_register %ebp -; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: .cfi_def_cfa_offset 12 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-X86-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait -; AVX-X86-NEXT: movl %ebp, %esp -; AVX-X86-NEXT: popl %ebp -; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: addl $8, %esp +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fnearbyintf64: Index: llvm/test/CodeGen/X86/fp-strict-scalar.ll =================================================================== --- llvm/test/CodeGen/X86/fp-strict-scalar.ll +++ llvm/test/CodeGen/X86/fp-strict-scalar.ll @@ -25,17 +25,13 @@ define double @fadd_f64(double %a, double %b) nounwind strictfp { ; SSE-X86-LABEL: fadd_f64: ; SSE-X86: # %bb.0: -; SSE-X86-NEXT: pushl %ebp -; SSE-X86-NEXT: movl %esp, %ebp -; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $8, %esp ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-X86-NEXT: addsd 16(%ebp), %xmm0 +; SSE-X86-NEXT: addsd {{[0-9]+}}(%esp), %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) ; SSE-X86-NEXT: wait -; SSE-X86-NEXT: movl %ebp, %esp -; SSE-X86-NEXT: popl %ebp +; SSE-X86-NEXT: addl $8, %esp ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: fadd_f64: @@ -45,17 +41,13 @@ ; ; AVX-X86-LABEL: fadd_f64: ; AVX-X86: # %bb.0: -; AVX-X86-NEXT: pushl %ebp -; AVX-X86-NEXT: movl %esp, %ebp -; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-X86-NEXT: vaddsd 16(%ebp), %xmm0, %xmm0 +; AVX-X86-NEXT: vaddsd {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait -; AVX-X86-NEXT: movl %ebp, %esp -; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: addl $8, %esp ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fadd_f64: @@ -123,17 +115,13 @@ define double @fsub_f64(double %a, double %b) nounwind strictfp { ; SSE-X86-LABEL: fsub_f64: ; SSE-X86: # %bb.0: -; SSE-X86-NEXT: pushl %ebp -; SSE-X86-NEXT: movl %esp, %ebp -; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $8, %esp ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-X86-NEXT: subsd 16(%ebp), %xmm0 +; SSE-X86-NEXT: subsd {{[0-9]+}}(%esp), %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) ; SSE-X86-NEXT: wait -; SSE-X86-NEXT: movl %ebp, %esp -; SSE-X86-NEXT: popl %ebp +; SSE-X86-NEXT: addl $8, %esp ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: fsub_f64: @@ -143,17 +131,13 @@ ; ; AVX-X86-LABEL: fsub_f64: ; AVX-X86: # %bb.0: -; AVX-X86-NEXT: pushl %ebp -; AVX-X86-NEXT: movl %esp, %ebp -; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-X86-NEXT: vsubsd 16(%ebp), %xmm0, %xmm0 +; AVX-X86-NEXT: vsubsd {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait -; AVX-X86-NEXT: movl %ebp, %esp -; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: addl $8, %esp ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fsub_f64: @@ -221,17 +205,13 @@ define double @fmul_f64(double %a, double %b) nounwind strictfp { ; SSE-X86-LABEL: fmul_f64: ; SSE-X86: # %bb.0: -; SSE-X86-NEXT: pushl %ebp -; SSE-X86-NEXT: movl %esp, %ebp -; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $8, %esp ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-X86-NEXT: mulsd 16(%ebp), %xmm0 +; SSE-X86-NEXT: mulsd {{[0-9]+}}(%esp), %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) ; SSE-X86-NEXT: wait -; SSE-X86-NEXT: movl %ebp, %esp -; SSE-X86-NEXT: popl %ebp +; SSE-X86-NEXT: addl $8, %esp ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: fmul_f64: @@ -241,17 +221,13 @@ ; ; AVX-X86-LABEL: fmul_f64: ; AVX-X86: # %bb.0: -; AVX-X86-NEXT: pushl %ebp -; AVX-X86-NEXT: movl %esp, %ebp -; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-X86-NEXT: vmulsd 16(%ebp), %xmm0, %xmm0 +; AVX-X86-NEXT: vmulsd {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait -; AVX-X86-NEXT: movl %ebp, %esp -; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: addl $8, %esp ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fmul_f64: @@ -319,17 +295,13 @@ define double @fdiv_f64(double %a, double %b) nounwind strictfp { ; SSE-X86-LABEL: fdiv_f64: ; SSE-X86: # %bb.0: -; SSE-X86-NEXT: pushl %ebp -; SSE-X86-NEXT: movl %esp, %ebp -; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $8, %esp ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-X86-NEXT: divsd 16(%ebp), %xmm0 +; SSE-X86-NEXT: divsd {{[0-9]+}}(%esp), %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) ; SSE-X86-NEXT: wait -; SSE-X86-NEXT: movl %ebp, %esp -; SSE-X86-NEXT: popl %ebp +; SSE-X86-NEXT: addl $8, %esp ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: fdiv_f64: @@ -339,17 +311,13 @@ ; ; AVX-X86-LABEL: fdiv_f64: ; AVX-X86: # %bb.0: -; AVX-X86-NEXT: pushl %ebp -; AVX-X86-NEXT: movl %esp, %ebp -; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-X86-NEXT: vdivsd 16(%ebp), %xmm0, %xmm0 +; AVX-X86-NEXT: vdivsd {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait -; AVX-X86-NEXT: movl %ebp, %esp -; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: addl $8, %esp ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fdiv_f64: @@ -632,9 +600,6 @@ ; ; AVX-X86-LABEL: fma_f64: ; AVX-X86: # %bb.0: -; AVX-X86-NEXT: pushl %ebp -; AVX-X86-NEXT: movl %esp, %ebp -; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero @@ -642,8 +607,7 @@ ; AVX-X86-NEXT: vmovsd %xmm1, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: wait -; AVX-X86-NEXT: movl %ebp, %esp -; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: addl $8, %esp ; AVX-X86-NEXT: retl ; ; AVX-X64-LABEL: fma_f64: Index: llvm/test/CodeGen/X86/fp80-strict-scalar.ll =================================================================== --- llvm/test/CodeGen/X86/fp80-strict-scalar.ll +++ llvm/test/CodeGen/X86/fp80-strict-scalar.ll @@ -183,16 +183,12 @@ define double @fptrunc_fp80_to_f64(x86_fp80 %a) nounwind strictfp { ; X86-LABEL: fptrunc_fp80_to_f64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: fldt 8(%ebp) +; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fstpl (%esp) ; X86-NEXT: fldl (%esp) ; X86-NEXT: wait -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $8, %esp ; X86-NEXT: retl ; ; X64-LABEL: fptrunc_fp80_to_f64: Index: llvm/test/CodeGen/X86/i64-mem-copy.ll =================================================================== --- llvm/test/CodeGen/X86/i64-mem-copy.ll +++ llvm/test/CodeGen/X86/i64-mem-copy.ll @@ -109,34 +109,28 @@ define void @PR23476(<5 x i64> %in, i64* %out, i32 %index) nounwind { ; X64-LABEL: PR23476: ; X64: # %bb.0: -; X64-NEXT: pushq %rbp -; X64-NEXT: movq %rsp, %rbp -; X64-NEXT: andq $-64, %rsp -; X64-NEXT: subq $128, %rsp ; X64-NEXT: movq %rsi, %xmm0 ; X64-NEXT: movq %rdi, %xmm1 ; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; X64-NEXT: movq %rcx, %xmm0 ; X64-NEXT: movq %rdx, %xmm2 ; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] -; X64-NEXT: movl 16(%rbp), %eax +; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: andl $7, %eax ; X64-NEXT: movq %r8, %xmm0 -; X64-NEXT: movdqa %xmm0, {{[0-9]+}}(%rsp) -; X64-NEXT: movdqa %xmm2, {{[0-9]+}}(%rsp) -; X64-NEXT: movdqa %xmm1, (%rsp) -; X64-NEXT: movq (%rsp,%rax,8), %rax +; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq -72(%rsp,%rax,8), %rax ; X64-NEXT: movq %rax, (%r9) -; X64-NEXT: movq %rbp, %rsp -; X64-NEXT: popq %rbp ; X64-NEXT: retq ; ; X32-LABEL: PR23476: ; X32: # %bb.0: ; X32-NEXT: pushl %ebp ; X32-NEXT: movl %esp, %ebp -; X32-NEXT: andl $-64, %esp -; X32-NEXT: subl $128, %esp +; X32-NEXT: andl $-16, %esp +; X32-NEXT: subl $80, %esp ; X32-NEXT: movl 52(%ebp), %eax ; X32-NEXT: andl $7, %eax ; X32-NEXT: movl 48(%ebp), %ecx @@ -156,8 +150,8 @@ ; X32AVX: # %bb.0: ; X32AVX-NEXT: pushl %ebp ; X32AVX-NEXT: movl %esp, %ebp -; X32AVX-NEXT: andl $-64, %esp -; X32AVX-NEXT: subl $128, %esp +; X32AVX-NEXT: andl $-32, %esp +; X32AVX-NEXT: subl $96, %esp ; X32AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X32AVX-NEXT: movl 52(%ebp), %eax ; X32AVX-NEXT: andl $7, %eax Index: llvm/test/CodeGen/X86/insertelement-var-index.ll =================================================================== --- llvm/test/CodeGen/X86/insertelement-var-index.ll +++ llvm/test/CodeGen/X86/insertelement-var-index.ll @@ -262,17 +262,11 @@ define <32 x i8> @arg_i8_v32i8(i8 %x, i32 %y) nounwind { ; SSE-LABEL: arg_i8_v32i8: ; SSE: # %bb.0: -; SSE-NEXT: pushq %rbp -; SSE-NEXT: movq %rsp, %rbp -; SSE-NEXT: andq $-32, %rsp -; SSE-NEXT: subq $64, %rsp ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: andl $31, %esi -; SSE-NEXT: movb %dil, (%rsp,%rsi) -; SSE-NEXT: movaps (%rsp), %xmm0 -; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; SSE-NEXT: movq %rbp, %rsp -; SSE-NEXT: popq %rbp +; SSE-NEXT: movb %dil, -40(%rsp,%rsi) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; ; AVX1-LABEL: arg_i8_v32i8: @@ -295,17 +289,11 @@ define <16 x i16> @arg_i16_v16i16(i16 %x, i32 %y) nounwind { ; SSE-LABEL: arg_i16_v16i16: ; SSE: # %bb.0: -; SSE-NEXT: pushq %rbp -; SSE-NEXT: movq %rsp, %rbp -; SSE-NEXT: andq $-32, %rsp -; SSE-NEXT: subq $64, %rsp ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: andl $15, %esi -; SSE-NEXT: movw %di, (%rsp,%rsi,2) -; SSE-NEXT: movaps (%rsp), %xmm0 -; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; SSE-NEXT: movq %rbp, %rsp -; SSE-NEXT: popq %rbp +; SSE-NEXT: movw %di, -40(%rsp,%rsi,2) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; ; AVX1-LABEL: arg_i16_v16i16: @@ -328,17 +316,11 @@ define <8 x i32> @arg_i32_v8i32(i32 %x, i32 %y) nounwind { ; SSE-LABEL: arg_i32_v8i32: ; SSE: # %bb.0: -; SSE-NEXT: pushq %rbp -; SSE-NEXT: movq %rsp, %rbp -; SSE-NEXT: andq $-32, %rsp -; SSE-NEXT: subq $64, %rsp ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: andl $7, %esi -; SSE-NEXT: movl %edi, (%rsp,%rsi,4) -; SSE-NEXT: movaps (%rsp), %xmm0 -; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; SSE-NEXT: movq %rbp, %rsp -; SSE-NEXT: popq %rbp +; SSE-NEXT: movl %edi, -40(%rsp,%rsi,4) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; ; AVX1-LABEL: arg_i32_v8i32: @@ -360,17 +342,11 @@ define <4 x i64> @arg_i64_v4i64(i64 %x, i32 %y) nounwind { ; SSE-LABEL: arg_i64_v4i64: ; SSE: # %bb.0: -; SSE-NEXT: pushq %rbp -; SSE-NEXT: movq %rsp, %rbp -; SSE-NEXT: andq $-32, %rsp -; SSE-NEXT: subq $64, %rsp ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: andl $3, %esi -; SSE-NEXT: movq %rdi, (%rsp,%rsi,8) -; SSE-NEXT: movaps (%rsp), %xmm0 -; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; SSE-NEXT: movq %rbp, %rsp -; SSE-NEXT: popq %rbp +; SSE-NEXT: movq %rdi, -40(%rsp,%rsi,8) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; ; AVX1-LABEL: arg_i64_v4i64: @@ -392,17 +368,11 @@ define <8 x float> @arg_f32_v8f32(float %x, i32 %y) nounwind { ; SSE-LABEL: arg_f32_v8f32: ; SSE: # %bb.0: -; SSE-NEXT: pushq %rbp -; SSE-NEXT: movq %rsp, %rbp -; SSE-NEXT: andq $-32, %rsp -; SSE-NEXT: subq $64, %rsp ; SSE-NEXT: # kill: def $edi killed $edi def $rdi ; SSE-NEXT: andl $7, %edi -; SSE-NEXT: movss %xmm0, (%rsp,%rdi,4) -; SSE-NEXT: movaps (%rsp), %xmm0 -; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; SSE-NEXT: movq %rbp, %rsp -; SSE-NEXT: popq %rbp +; SSE-NEXT: movss %xmm0, -40(%rsp,%rdi,4) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; ; AVX1-LABEL: arg_f32_v8f32: @@ -422,17 +392,11 @@ define <4 x double> @arg_f64_v4f64(double %x, i32 %y) nounwind { ; SSE-LABEL: arg_f64_v4f64: ; SSE: # %bb.0: -; SSE-NEXT: pushq %rbp -; SSE-NEXT: movq %rsp, %rbp -; SSE-NEXT: andq $-32, %rsp -; SSE-NEXT: subq $64, %rsp ; SSE-NEXT: # kill: def $edi killed $edi def $rdi ; SSE-NEXT: andl $3, %edi -; SSE-NEXT: movsd %xmm0, (%rsp,%rdi,8) -; SSE-NEXT: movaps (%rsp), %xmm0 -; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; SSE-NEXT: movq %rbp, %rsp -; SSE-NEXT: popq %rbp +; SSE-NEXT: movsd %xmm0, -40(%rsp,%rdi,8) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; ; AVX1-LABEL: arg_f64_v4f64: @@ -452,18 +416,12 @@ define <32 x i8> @load_i8_v32i8(i8* %p, i32 %y) nounwind { ; SSE-LABEL: load_i8_v32i8: ; SSE: # %bb.0: -; SSE-NEXT: pushq %rbp -; SSE-NEXT: movq %rsp, %rbp -; SSE-NEXT: andq $-32, %rsp -; SSE-NEXT: subq $64, %rsp ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: movb (%rdi), %al ; SSE-NEXT: andl $31, %esi -; SSE-NEXT: movb %al, (%rsp,%rsi) -; SSE-NEXT: movaps (%rsp), %xmm0 -; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; SSE-NEXT: movq %rbp, %rsp -; SSE-NEXT: popq %rbp +; SSE-NEXT: movb %al, -40(%rsp,%rsi) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; ; AVX1-LABEL: load_i8_v32i8: @@ -487,18 +445,12 @@ define <16 x i16> @load_i16_v16i16(i16* %p, i32 %y) nounwind { ; SSE-LABEL: load_i16_v16i16: ; SSE: # %bb.0: -; SSE-NEXT: pushq %rbp -; SSE-NEXT: movq %rsp, %rbp -; SSE-NEXT: andq $-32, %rsp -; SSE-NEXT: subq $64, %rsp ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: movzwl (%rdi), %eax ; SSE-NEXT: andl $15, %esi -; SSE-NEXT: movw %ax, (%rsp,%rsi,2) -; SSE-NEXT: movaps (%rsp), %xmm0 -; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; SSE-NEXT: movq %rbp, %rsp -; SSE-NEXT: popq %rbp +; SSE-NEXT: movw %ax, -40(%rsp,%rsi,2) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; ; AVX1-LABEL: load_i16_v16i16: @@ -522,18 +474,12 @@ define <8 x i32> @load_i32_v8i32(i32* %p, i32 %y) nounwind { ; SSE-LABEL: load_i32_v8i32: ; SSE: # %bb.0: -; SSE-NEXT: pushq %rbp -; SSE-NEXT: movq %rsp, %rbp -; SSE-NEXT: andq $-32, %rsp -; SSE-NEXT: subq $64, %rsp ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: movl (%rdi), %eax ; SSE-NEXT: andl $7, %esi -; SSE-NEXT: movl %eax, (%rsp,%rsi,4) -; SSE-NEXT: movaps (%rsp), %xmm0 -; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; SSE-NEXT: movq %rbp, %rsp -; SSE-NEXT: popq %rbp +; SSE-NEXT: movl %eax, -40(%rsp,%rsi,4) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; ; AVX-LABEL: load_i32_v8i32: @@ -548,18 +494,12 @@ define <4 x i64> @load_i64_v4i64(i64* %p, i32 %y) nounwind { ; SSE-LABEL: load_i64_v4i64: ; SSE: # %bb.0: -; SSE-NEXT: pushq %rbp -; SSE-NEXT: movq %rsp, %rbp -; SSE-NEXT: andq $-32, %rsp -; SSE-NEXT: subq $64, %rsp ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: movq (%rdi), %rax ; SSE-NEXT: andl $3, %esi -; SSE-NEXT: movq %rax, (%rsp,%rsi,8) -; SSE-NEXT: movaps (%rsp), %xmm0 -; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; SSE-NEXT: movq %rbp, %rsp -; SSE-NEXT: popq %rbp +; SSE-NEXT: movq %rax, -40(%rsp,%rsi,8) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; ; AVX-LABEL: load_i64_v4i64: @@ -574,18 +514,12 @@ define <8 x float> @load_f32_v8f32(float* %p, i32 %y) nounwind { ; SSE-LABEL: load_f32_v8f32: ; SSE: # %bb.0: -; SSE-NEXT: pushq %rbp -; SSE-NEXT: movq %rsp, %rbp -; SSE-NEXT: andq $-32, %rsp -; SSE-NEXT: subq $64, %rsp ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-NEXT: andl $7, %esi -; SSE-NEXT: movss %xmm0, (%rsp,%rsi,4) -; SSE-NEXT: movaps (%rsp), %xmm0 -; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; SSE-NEXT: movq %rbp, %rsp -; SSE-NEXT: popq %rbp +; SSE-NEXT: movss %xmm0, -40(%rsp,%rsi,4) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; ; AVX-LABEL: load_f32_v8f32: @@ -600,18 +534,12 @@ define <4 x double> @load_f64_v4f64(double* %p, i32 %y) nounwind { ; SSE-LABEL: load_f64_v4f64: ; SSE: # %bb.0: -; SSE-NEXT: pushq %rbp -; SSE-NEXT: movq %rsp, %rbp -; SSE-NEXT: andq $-32, %rsp -; SSE-NEXT: subq $64, %rsp ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-NEXT: andl $3, %esi -; SSE-NEXT: movsd %xmm0, (%rsp,%rsi,8) -; SSE-NEXT: movaps (%rsp), %xmm0 -; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; SSE-NEXT: movq %rbp, %rsp -; SSE-NEXT: popq %rbp +; SSE-NEXT: movsd %xmm0, -40(%rsp,%rsi,8) +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; SSE-NEXT: retq ; ; AVX-LABEL: load_f64_v4f64: Index: llvm/test/CodeGen/X86/mmx-fold-zero.ll =================================================================== --- llvm/test/CodeGen/X86/mmx-fold-zero.ll +++ llvm/test/CodeGen/X86/mmx-fold-zero.ll @@ -5,25 +5,22 @@ define double @mmx_zero(double, double, double, double) nounwind { ; X86-LABEL: mmx_zero: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movq 8(%ebp), %mm0 -; X86-NEXT: movq 16(%ebp), %mm5 +; X86-NEXT: subl $20, %esp +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm5 ; X86-NEXT: movq %mm5, (%esp) # 8-byte Spill ; X86-NEXT: movq %mm0, %mm3 ; X86-NEXT: paddd %mm5, %mm3 ; X86-NEXT: pxor %mm1, %mm1 ; X86-NEXT: movq %mm3, %mm6 ; X86-NEXT: pmuludq %mm1, %mm6 -; X86-NEXT: movq 24(%ebp), %mm4 +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm4 ; X86-NEXT: movq %mm6, %mm2 ; X86-NEXT: paddd %mm4, %mm2 ; X86-NEXT: paddw %mm2, %mm0 ; X86-NEXT: movq %mm5, %mm1 ; X86-NEXT: paddw %mm0, %mm1 -; X86-NEXT: movq 32(%ebp), %mm5 +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm5 ; X86-NEXT: movq %mm1, %mm7 ; X86-NEXT: pmuludq %mm5, %mm7 ; X86-NEXT: paddw %mm4, %mm7 @@ -41,15 +38,14 @@ ; X86-NEXT: movq2dq %mm0, %xmm0 ; X86-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) ; X86-NEXT: fldl {{[0-9]+}}(%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: addl $20, %esp ; X86-NEXT: retl ; ; X64-LABEL: mmx_zero: ; X64: # %bb.0: ; X64-NEXT: movdq2q %xmm0, %mm0 ; X64-NEXT: movdq2q %xmm1, %mm5 -; X64-NEXT: movq %mm5, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %mm5, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %mm0, %mm3 ; X64-NEXT: paddd %mm5, %mm3 ; X64-NEXT: pxor %mm1, %mm1 @@ -73,7 +69,7 @@ ; X64-NEXT: paddw {{\.LCPI.*}}, %mm0 ; X64-NEXT: paddw %mm1, %mm0 ; X64-NEXT: pmuludq %mm7, %mm0 -; X64-NEXT: pmuludq -{{[0-9]+}}(%rsp), %mm0 # 8-byte Folded Reload +; X64-NEXT: pmuludq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload ; X64-NEXT: paddw %mm5, %mm0 ; X64-NEXT: paddw %mm2, %mm0 ; X64-NEXT: movq2dq %mm0, %xmm0 Index: llvm/test/CodeGen/X86/neg_fp.ll =================================================================== --- llvm/test/CodeGen/X86/neg_fp.ll +++ llvm/test/CodeGen/X86/neg_fp.ll @@ -26,12 +26,9 @@ define double @negation_propagation(double* %arg, double %arg1, double %arg2) nounwind { ; CHECK-LABEL: negation_propagation: ; CHECK: # %bb.0: -; CHECK-NEXT: pushl %ebp -; CHECK-NEXT: movl %esp, %ebp -; CHECK-NEXT: andl $-8, %esp ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: divsd 12(%ebp), %xmm0 +; CHECK-NEXT: divsd {{[0-9]+}}(%esp), %xmm0 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: mulsd %xmm0, %xmm1 ; CHECK-NEXT: movapd %xmm0, %xmm2 @@ -40,8 +37,7 @@ ; CHECK-NEXT: subsd %xmm2, %xmm1 ; CHECK-NEXT: movsd %xmm1, (%esp) ; CHECK-NEXT: fldl (%esp) -; CHECK-NEXT: movl %ebp, %esp -; CHECK-NEXT: popl %ebp +; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: retl %t = fdiv double 1.0, %arg1 %t7 = fmul double %t, %arg2 Index: llvm/test/CodeGen/X86/powi.ll =================================================================== --- llvm/test/CodeGen/X86/powi.ll +++ llvm/test/CodeGen/X86/powi.ll @@ -20,9 +20,6 @@ ; ; X86-SSE-LABEL: pow_wrapper: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %ebp -; X86-SSE-NEXT: movl %esp, %ebp -; X86-SSE-NEXT: andl $-8, %esp ; X86-SSE-NEXT: subl $8, %esp ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE-NEXT: movapd %xmm0, %xmm1 @@ -34,8 +31,7 @@ ; X86-SSE-NEXT: mulsd %xmm0, %xmm1 ; X86-SSE-NEXT: movsd %xmm1, (%esp) ; X86-SSE-NEXT: fldl (%esp) -; X86-SSE-NEXT: movl %ebp, %esp -; X86-SSE-NEXT: popl %ebp +; X86-SSE-NEXT: addl $8, %esp ; X86-SSE-NEXT: retl ; ; X64-LABEL: pow_wrapper: Index: llvm/test/CodeGen/X86/scalar-int-to-fp.ll =================================================================== --- llvm/test/CodeGen/X86/scalar-int-to-fp.ll +++ llvm/test/CodeGen/X86/scalar-int-to-fp.ll @@ -126,15 +126,11 @@ define double @u32_to_d(i32 %a) nounwind { ; AVX512_32-LABEL: u32_to_d: ; AVX512_32: # %bb.0: -; AVX512_32-NEXT: pushl %ebp -; AVX512_32-NEXT: movl %esp, %ebp -; AVX512_32-NEXT: andl $-8, %esp ; AVX512_32-NEXT: subl $8, %esp -; AVX512_32-NEXT: vcvtusi2sdl 8(%ebp), %xmm0, %xmm0 +; AVX512_32-NEXT: vcvtusi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX512_32-NEXT: vmovsd %xmm0, (%esp) ; AVX512_32-NEXT: fldl (%esp) -; AVX512_32-NEXT: movl %ebp, %esp -; AVX512_32-NEXT: popl %ebp +; AVX512_32-NEXT: addl $8, %esp ; AVX512_32-NEXT: retl ; ; AVX512_64-LABEL: u32_to_d: @@ -144,9 +140,6 @@ ; ; SSE2_32-LABEL: u32_to_d: ; SSE2_32: # %bb.0: -; SSE2_32-NEXT: pushl %ebp -; SSE2_32-NEXT: movl %esp, %ebp -; SSE2_32-NEXT: andl $-8, %esp ; SSE2_32-NEXT: subl $8, %esp ; SSE2_32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE2_32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero @@ -154,8 +147,7 @@ ; SSE2_32-NEXT: subsd %xmm0, %xmm1 ; SSE2_32-NEXT: movsd %xmm1, (%esp) ; SSE2_32-NEXT: fldl (%esp) -; SSE2_32-NEXT: movl %ebp, %esp -; SSE2_32-NEXT: popl %ebp +; SSE2_32-NEXT: addl $8, %esp ; SSE2_32-NEXT: retl ; ; SSE2_64-LABEL: u32_to_d: @@ -198,15 +190,11 @@ define double @s32_to_d(i32 %a) nounwind { ; AVX512_32-LABEL: s32_to_d: ; AVX512_32: # %bb.0: -; AVX512_32-NEXT: pushl %ebp -; AVX512_32-NEXT: movl %esp, %ebp -; AVX512_32-NEXT: andl $-8, %esp ; AVX512_32-NEXT: subl $8, %esp -; AVX512_32-NEXT: vcvtsi2sdl 8(%ebp), %xmm0, %xmm0 +; AVX512_32-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX512_32-NEXT: vmovsd %xmm0, (%esp) ; AVX512_32-NEXT: fldl (%esp) -; AVX512_32-NEXT: movl %ebp, %esp -; AVX512_32-NEXT: popl %ebp +; AVX512_32-NEXT: addl $8, %esp ; AVX512_32-NEXT: retl ; ; AVX512_64-LABEL: s32_to_d: @@ -216,15 +204,11 @@ ; ; SSE2_32-LABEL: s32_to_d: ; SSE2_32: # %bb.0: -; SSE2_32-NEXT: pushl %ebp -; SSE2_32-NEXT: movl %esp, %ebp -; SSE2_32-NEXT: andl $-8, %esp ; SSE2_32-NEXT: subl $8, %esp -; SSE2_32-NEXT: cvtsi2sdl 8(%ebp), %xmm0 +; SSE2_32-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 ; SSE2_32-NEXT: movsd %xmm0, (%esp) ; SSE2_32-NEXT: fldl (%esp) -; SSE2_32-NEXT: movl %ebp, %esp -; SSE2_32-NEXT: popl %ebp +; SSE2_32-NEXT: addl $8, %esp ; SSE2_32-NEXT: retl ; ; SSE2_64-LABEL: s32_to_d: @@ -614,16 +598,12 @@ define double @u64_to_d(i64 %a) nounwind { ; AVX512DQVL_32-LABEL: u64_to_d: ; AVX512DQVL_32: # %bb.0: -; AVX512DQVL_32-NEXT: pushl %ebp -; AVX512DQVL_32-NEXT: movl %esp, %ebp -; AVX512DQVL_32-NEXT: andl $-8, %esp ; AVX512DQVL_32-NEXT: subl $8, %esp ; AVX512DQVL_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX512DQVL_32-NEXT: vcvtuqq2pd %ymm0, %ymm0 ; AVX512DQVL_32-NEXT: vmovlps %xmm0, (%esp) ; AVX512DQVL_32-NEXT: fldl (%esp) -; AVX512DQVL_32-NEXT: movl %ebp, %esp -; AVX512DQVL_32-NEXT: popl %ebp +; AVX512DQVL_32-NEXT: addl $8, %esp ; AVX512DQVL_32-NEXT: vzeroupper ; AVX512DQVL_32-NEXT: retl ; @@ -634,24 +614,17 @@ ; ; AVX512DQ_32-LABEL: u64_to_d: ; AVX512DQ_32: # %bb.0: -; AVX512DQ_32-NEXT: pushl %ebp -; AVX512DQ_32-NEXT: movl %esp, %ebp -; AVX512DQ_32-NEXT: andl $-8, %esp ; AVX512DQ_32-NEXT: subl $8, %esp ; AVX512DQ_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX512DQ_32-NEXT: vcvtuqq2pd %zmm0, %zmm0 ; AVX512DQ_32-NEXT: vmovlps %xmm0, (%esp) ; AVX512DQ_32-NEXT: fldl (%esp) -; AVX512DQ_32-NEXT: movl %ebp, %esp -; AVX512DQ_32-NEXT: popl %ebp +; AVX512DQ_32-NEXT: addl $8, %esp ; AVX512DQ_32-NEXT: vzeroupper ; AVX512DQ_32-NEXT: retl ; ; AVX512F_32-LABEL: u64_to_d: ; AVX512F_32: # %bb.0: -; AVX512F_32-NEXT: pushl %ebp -; AVX512F_32-NEXT: movl %esp, %ebp -; AVX512F_32-NEXT: andl $-8, %esp ; AVX512F_32-NEXT: subl $8, %esp ; AVX512F_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX512F_32-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] @@ -660,15 +633,11 @@ ; AVX512F_32-NEXT: vaddsd %xmm0, %xmm1, %xmm0 ; AVX512F_32-NEXT: vmovsd %xmm0, (%esp) ; AVX512F_32-NEXT: fldl (%esp) -; AVX512F_32-NEXT: movl %ebp, %esp -; AVX512F_32-NEXT: popl %ebp +; AVX512F_32-NEXT: addl $8, %esp ; AVX512F_32-NEXT: retl ; ; SSE2_32-LABEL: u64_to_d: ; SSE2_32: # %bb.0: -; SSE2_32-NEXT: pushl %ebp -; SSE2_32-NEXT: movl %esp, %ebp -; SSE2_32-NEXT: andl $-8, %esp ; SSE2_32-NEXT: subl $8, %esp ; SSE2_32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE2_32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] @@ -678,8 +647,7 @@ ; SSE2_32-NEXT: addsd %xmm0, %xmm1 ; SSE2_32-NEXT: movsd %xmm1, (%esp) ; SSE2_32-NEXT: fldl (%esp) -; SSE2_32-NEXT: movl %ebp, %esp -; SSE2_32-NEXT: popl %ebp +; SSE2_32-NEXT: addl $8, %esp ; SSE2_32-NEXT: retl ; ; SSE2_64-LABEL: u64_to_d: @@ -736,16 +704,12 @@ define double @u64_to_d_optsize(i64 %a) nounwind optsize { ; AVX512DQVL_32-LABEL: u64_to_d_optsize: ; AVX512DQVL_32: # %bb.0: -; AVX512DQVL_32-NEXT: pushl %ebp -; AVX512DQVL_32-NEXT: movl %esp, %ebp -; AVX512DQVL_32-NEXT: andl $-8, %esp ; AVX512DQVL_32-NEXT: subl $8, %esp ; AVX512DQVL_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX512DQVL_32-NEXT: vcvtuqq2pd %ymm0, %ymm0 ; AVX512DQVL_32-NEXT: vmovlps %xmm0, (%esp) ; AVX512DQVL_32-NEXT: fldl (%esp) -; AVX512DQVL_32-NEXT: movl %ebp, %esp -; AVX512DQVL_32-NEXT: popl %ebp +; AVX512DQVL_32-NEXT: addl $8, %esp ; AVX512DQVL_32-NEXT: vzeroupper ; AVX512DQVL_32-NEXT: retl ; @@ -756,24 +720,17 @@ ; ; AVX512DQ_32-LABEL: u64_to_d_optsize: ; AVX512DQ_32: # %bb.0: -; AVX512DQ_32-NEXT: pushl %ebp -; AVX512DQ_32-NEXT: movl %esp, %ebp -; AVX512DQ_32-NEXT: andl $-8, %esp ; AVX512DQ_32-NEXT: subl $8, %esp ; AVX512DQ_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX512DQ_32-NEXT: vcvtuqq2pd %zmm0, %zmm0 ; AVX512DQ_32-NEXT: vmovlps %xmm0, (%esp) ; AVX512DQ_32-NEXT: fldl (%esp) -; AVX512DQ_32-NEXT: movl %ebp, %esp -; AVX512DQ_32-NEXT: popl %ebp +; AVX512DQ_32-NEXT: addl $8, %esp ; AVX512DQ_32-NEXT: vzeroupper ; AVX512DQ_32-NEXT: retl ; ; AVX512F_32-LABEL: u64_to_d_optsize: ; AVX512F_32: # %bb.0: -; AVX512F_32-NEXT: pushl %ebp -; AVX512F_32-NEXT: movl %esp, %ebp -; AVX512F_32-NEXT: andl $-8, %esp ; AVX512F_32-NEXT: subl $8, %esp ; AVX512F_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX512F_32-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] @@ -781,15 +738,11 @@ ; AVX512F_32-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX512F_32-NEXT: vmovlpd %xmm0, (%esp) ; AVX512F_32-NEXT: fldl (%esp) -; AVX512F_32-NEXT: movl %ebp, %esp -; AVX512F_32-NEXT: popl %ebp +; AVX512F_32-NEXT: addl $8, %esp ; AVX512F_32-NEXT: retl ; ; SSE2_32-LABEL: u64_to_d_optsize: ; SSE2_32: # %bb.0: -; SSE2_32-NEXT: pushl %ebp -; SSE2_32-NEXT: movl %esp, %ebp -; SSE2_32-NEXT: andl $-8, %esp ; SSE2_32-NEXT: subl $8, %esp ; SSE2_32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE2_32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] @@ -799,8 +752,7 @@ ; SSE2_32-NEXT: addsd %xmm0, %xmm1 ; SSE2_32-NEXT: movsd %xmm1, (%esp) ; SSE2_32-NEXT: fldl (%esp) -; SSE2_32-NEXT: movl %ebp, %esp -; SSE2_32-NEXT: popl %ebp +; SSE2_32-NEXT: addl $8, %esp ; SSE2_32-NEXT: retl ; ; SSE2_64-LABEL: u64_to_d_optsize: @@ -857,16 +809,12 @@ define double @s64_to_d(i64 %a) nounwind { ; AVX512DQVL_32-LABEL: s64_to_d: ; AVX512DQVL_32: # %bb.0: -; AVX512DQVL_32-NEXT: pushl %ebp -; AVX512DQVL_32-NEXT: movl %esp, %ebp -; AVX512DQVL_32-NEXT: andl $-8, %esp ; AVX512DQVL_32-NEXT: subl $8, %esp ; AVX512DQVL_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX512DQVL_32-NEXT: vcvtqq2pd %ymm0, %ymm0 ; AVX512DQVL_32-NEXT: vmovlps %xmm0, (%esp) ; AVX512DQVL_32-NEXT: fldl (%esp) -; AVX512DQVL_32-NEXT: movl %ebp, %esp -; AVX512DQVL_32-NEXT: popl %ebp +; AVX512DQVL_32-NEXT: addl $8, %esp ; AVX512DQVL_32-NEXT: vzeroupper ; AVX512DQVL_32-NEXT: retl ; @@ -877,16 +825,12 @@ ; ; AVX512DQ_32-LABEL: s64_to_d: ; AVX512DQ_32: # %bb.0: -; AVX512DQ_32-NEXT: pushl %ebp -; AVX512DQ_32-NEXT: movl %esp, %ebp -; AVX512DQ_32-NEXT: andl $-8, %esp ; AVX512DQ_32-NEXT: subl $8, %esp ; AVX512DQ_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX512DQ_32-NEXT: vcvtqq2pd %zmm0, %zmm0 ; AVX512DQ_32-NEXT: vmovlps %xmm0, (%esp) ; AVX512DQ_32-NEXT: fldl (%esp) -; AVX512DQ_32-NEXT: movl %ebp, %esp -; AVX512DQ_32-NEXT: popl %ebp +; AVX512DQ_32-NEXT: addl $8, %esp ; AVX512DQ_32-NEXT: vzeroupper ; AVX512DQ_32-NEXT: retl ; @@ -937,12 +881,9 @@ define double @s64_to_d_2(i64 %a) nounwind { ; AVX512DQVL_32-LABEL: s64_to_d_2: ; AVX512DQVL_32: # %bb.0: -; AVX512DQVL_32-NEXT: pushl %ebp -; AVX512DQVL_32-NEXT: movl %esp, %ebp -; AVX512DQVL_32-NEXT: andl $-8, %esp ; AVX512DQVL_32-NEXT: subl $8, %esp -; AVX512DQVL_32-NEXT: movl 8(%ebp), %eax -; AVX512DQVL_32-NEXT: movl 12(%ebp), %ecx +; AVX512DQVL_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512DQVL_32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; AVX512DQVL_32-NEXT: addl $5, %eax ; AVX512DQVL_32-NEXT: adcl $0, %ecx ; AVX512DQVL_32-NEXT: vmovd %eax, %xmm0 @@ -950,8 +891,7 @@ ; AVX512DQVL_32-NEXT: vcvtqq2pd %ymm0, %ymm0 ; AVX512DQVL_32-NEXT: vmovlps %xmm0, (%esp) ; AVX512DQVL_32-NEXT: fldl (%esp) -; AVX512DQVL_32-NEXT: movl %ebp, %esp -; AVX512DQVL_32-NEXT: popl %ebp +; AVX512DQVL_32-NEXT: addl $8, %esp ; AVX512DQVL_32-NEXT: vzeroupper ; AVX512DQVL_32-NEXT: retl ; @@ -963,12 +903,9 @@ ; ; AVX512DQ_32-LABEL: s64_to_d_2: ; AVX512DQ_32: # %bb.0: -; AVX512DQ_32-NEXT: pushl %ebp -; AVX512DQ_32-NEXT: movl %esp, %ebp -; AVX512DQ_32-NEXT: andl $-8, %esp ; AVX512DQ_32-NEXT: subl $8, %esp -; AVX512DQ_32-NEXT: movl 8(%ebp), %eax -; AVX512DQ_32-NEXT: movl 12(%ebp), %ecx +; AVX512DQ_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512DQ_32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; AVX512DQ_32-NEXT: addl $5, %eax ; AVX512DQ_32-NEXT: adcl $0, %ecx ; AVX512DQ_32-NEXT: vmovd %eax, %xmm0 @@ -976,8 +913,7 @@ ; AVX512DQ_32-NEXT: vcvtqq2pd %zmm0, %zmm0 ; AVX512DQ_32-NEXT: vmovlps %xmm0, (%esp) ; AVX512DQ_32-NEXT: fldl (%esp) -; AVX512DQ_32-NEXT: movl %ebp, %esp -; AVX512DQ_32-NEXT: popl %ebp +; AVX512DQ_32-NEXT: addl $8, %esp ; AVX512DQ_32-NEXT: vzeroupper ; AVX512DQ_32-NEXT: retl ; Index: llvm/test/CodeGen/X86/sse-fcopysign.ll =================================================================== --- llvm/test/CodeGen/X86/sse-fcopysign.ll +++ llvm/test/CodeGen/X86/sse-fcopysign.ll @@ -87,12 +87,9 @@ define double @int2(double %a, float %b, float %c) nounwind { ; X32-LABEL: int2: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: andl $-8, %esp ; X32-NEXT: subl $8, %esp ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: addss 20(%ebp), %xmm0 +; X32-NEXT: addss {{[0-9]+}}(%esp), %xmm0 ; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; X32-NEXT: andps {{\.LCPI.*}}, %xmm1 ; X32-NEXT: cvtss2sd %xmm0, %xmm0 @@ -100,8 +97,7 @@ ; X32-NEXT: orps %xmm1, %xmm0 ; X32-NEXT: movlps %xmm0, (%esp) ; X32-NEXT: fldl (%esp) -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp +; X32-NEXT: addl $8, %esp ; X32-NEXT: retl ; ; X64-LABEL: int2: Index: llvm/test/CodeGen/X86/sse-load-ret.ll =================================================================== --- llvm/test/CodeGen/X86/sse-load-ret.ll +++ llvm/test/CodeGen/X86/sse-load-ret.ll @@ -23,14 +23,9 @@ define double @test3(i1 %B) { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: -; CHECK-NEXT: pushl %ebp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: .cfi_offset %ebp, -8 -; CHECK-NEXT: movl %esp, %ebp -; CHECK-NEXT: .cfi_def_cfa_register %ebp -; CHECK-NEXT: andl $-8, %esp ; CHECK-NEXT: subl $8, %esp -; CHECK-NEXT: testb $1, 8(%ebp) +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: jne .LBB2_1 ; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero @@ -40,9 +35,8 @@ ; CHECK-NEXT: .LBB2_3: ; CHECK-NEXT: movsd %xmm0, (%esp) ; CHECK-NEXT: fldl (%esp) -; CHECK-NEXT: movl %ebp, %esp -; CHECK-NEXT: popl %ebp -; CHECK-NEXT: .cfi_def_cfa %esp, 4 +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 4 ; CHECK-NEXT: retl %C = select i1 %B, double 123.412, double 523.01123123 ret double %C Index: llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll =================================================================== --- llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -1460,38 +1460,26 @@ define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind { ; X86-SSE-LABEL: test_mm_cvtsd_f64: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] -; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] -; X86-SSE-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] ; X86-SSE-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] ; X86-SSE-NEXT: movlps %xmm0, (%esp) # encoding: [0x0f,0x13,0x04,0x24] ; X86-SSE-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] -; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] -; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] +; X86-SSE-NEXT: addl $8, %esp # encoding: [0x83,0xc4,0x08] ; X86-SSE-NEXT: retl # encoding: [0xc3] ; ; X86-AVX1-LABEL: test_mm_cvtsd_f64: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: pushl %ebp # encoding: [0x55] -; X86-AVX1-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] -; X86-AVX1-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] ; X86-AVX1-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] ; X86-AVX1-NEXT: vmovlps %xmm0, (%esp) # encoding: [0xc5,0xf8,0x13,0x04,0x24] ; X86-AVX1-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] -; X86-AVX1-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] -; X86-AVX1-NEXT: popl %ebp # encoding: [0x5d] +; X86-AVX1-NEXT: addl $8, %esp # encoding: [0x83,0xc4,0x08] ; X86-AVX1-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512-LABEL: test_mm_cvtsd_f64: ; X86-AVX512: # %bb.0: -; X86-AVX512-NEXT: pushl %ebp # encoding: [0x55] -; X86-AVX512-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] -; X86-AVX512-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] ; X86-AVX512-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] ; X86-AVX512-NEXT: vmovlps %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x04,0x24] ; X86-AVX512-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] -; X86-AVX512-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] -; X86-AVX512-NEXT: popl %ebp # encoding: [0x5d] +; X86-AVX512-NEXT: addl $8, %esp # encoding: [0x83,0xc4,0x08] ; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm_cvtsd_f64: @@ -4996,47 +4984,35 @@ define double @test_mm_sqrt_sd_scalar(double %a0) nounwind { ; X86-SSE-LABEL: test_mm_sqrt_sd_scalar: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] -; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] -; X86-SSE-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] ; X86-SSE-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] -; X86-SSE-NEXT: movsd 8(%ebp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x45,0x08] +; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x0c] ; X86-SSE-NEXT: # xmm0 = mem[0],zero ; X86-SSE-NEXT: sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0] ; X86-SSE-NEXT: movsd %xmm0, (%esp) # encoding: [0xf2,0x0f,0x11,0x04,0x24] ; X86-SSE-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] -; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] -; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] +; X86-SSE-NEXT: addl $8, %esp # encoding: [0x83,0xc4,0x08] ; X86-SSE-NEXT: retl # encoding: [0xc3] ; ; X86-AVX1-LABEL: test_mm_sqrt_sd_scalar: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: pushl %ebp # encoding: [0x55] -; X86-AVX1-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] -; X86-AVX1-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] ; X86-AVX1-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] -; X86-AVX1-NEXT: vmovsd 8(%ebp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x45,0x08] +; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c] ; X86-AVX1-NEXT: # xmm0 = mem[0],zero ; X86-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0] ; X86-AVX1-NEXT: vmovsd %xmm0, (%esp) # encoding: [0xc5,0xfb,0x11,0x04,0x24] ; X86-AVX1-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] -; X86-AVX1-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] -; X86-AVX1-NEXT: popl %ebp # encoding: [0x5d] +; X86-AVX1-NEXT: addl $8, %esp # encoding: [0x83,0xc4,0x08] ; X86-AVX1-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512-LABEL: test_mm_sqrt_sd_scalar: ; X86-AVX512: # %bb.0: -; X86-AVX512-NEXT: pushl %ebp # encoding: [0x55] -; X86-AVX512-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] -; X86-AVX512-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] ; X86-AVX512-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] -; X86-AVX512-NEXT: vmovsd 8(%ebp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x45,0x08] +; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c] ; X86-AVX512-NEXT: # xmm0 = mem[0],zero ; X86-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] ; X86-AVX512-NEXT: vmovsd %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x04,0x24] ; X86-AVX512-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] -; X86-AVX512-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] -; X86-AVX512-NEXT: popl %ebp # encoding: [0x5d] +; X86-AVX512-NEXT: addl $8, %esp # encoding: [0x83,0xc4,0x08] ; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_sqrt_sd_scalar: Index: llvm/test/CodeGen/X86/var-permute-128.ll =================================================================== --- llvm/test/CodeGen/X86/var-permute-128.ll +++ llvm/test/CodeGen/X86/var-permute-128.ll @@ -643,116 +643,112 @@ ; SSE3-LABEL: var_shuffle_v16i8_from_v32i8_v16i8: ; SSE3: # %bb.0: ; SSE3-NEXT: pushq %rbp -; SSE3-NEXT: movq %rsp, %rbp ; SSE3-NEXT: pushq %r15 ; SSE3-NEXT: pushq %r14 ; SSE3-NEXT: pushq %r13 ; SSE3-NEXT: pushq %r12 ; SSE3-NEXT: pushq %rbx -; SSE3-NEXT: andq $-32, %rsp -; SSE3-NEXT: subq $608, %rsp # imm = 0x260 -; SSE3-NEXT: movaps %xmm2, {{[0-9]+}}(%rsp) +; SSE3-NEXT: subq $424, %rsp # imm = 0x1A8 +; SSE3-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) ; SSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; SSE3-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE3-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; SSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE3-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; SSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d -; SSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d +; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r10d ; SSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d +; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r11d ; SSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d +; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r14d ; SSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d +; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r15d ; SSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d +; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r12d ; SSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d +; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r13d ; SSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r9d ; SSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %esi +; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ebx ; SSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edi ; SSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; SSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %r9d -; SSE3-NEXT: andl $31, %r9d -; SSE3-NEXT: movzbl 64(%rsp,%r9), %ebx -; SSE3-NEXT: movd %ebx, %xmm8 -; SSE3-NEXT: andl $31, %eax -; SSE3-NEXT: movzbl 96(%rsp,%rax), %eax -; SSE3-NEXT: movd %eax, %xmm15 +; SSE3-NEXT: movaps %xmm0, (%rsp) +; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; SSE3-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSE3-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ebp +; SSE3-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r8d +; SSE3-NEXT: andl $31, %r8d +; SSE3-NEXT: movzbl -96(%rsp,%r8), %esi +; SSE3-NEXT: movd %esi, %xmm8 +; SSE3-NEXT: andl $31, %ebp +; SSE3-NEXT: movzbl -64(%rsp,%rbp), %esi +; SSE3-NEXT: movd %esi, %xmm15 ; SSE3-NEXT: andl $31, %edx -; SSE3-NEXT: movzbl 128(%rsp,%rdx), %eax -; SSE3-NEXT: movd %eax, %xmm9 +; SSE3-NEXT: movzbl -32(%rsp,%rdx), %edx +; SSE3-NEXT: movd %edx, %xmm9 ; SSE3-NEXT: andl $31, %ecx -; SSE3-NEXT: movzbl 160(%rsp,%rcx), %eax -; SSE3-NEXT: movd %eax, %xmm3 -; SSE3-NEXT: andl $31, %esi -; SSE3-NEXT: movzbl 192(%rsp,%rsi), %eax +; SSE3-NEXT: movzbl (%rsp,%rcx), %ecx +; SSE3-NEXT: movd %ecx, %xmm3 +; SSE3-NEXT: andl $31, %eax +; SSE3-NEXT: movzbl 32(%rsp,%rax), %eax ; SSE3-NEXT: movd %eax, %xmm10 ; SSE3-NEXT: andl $31, %edi -; SSE3-NEXT: movzbl 224(%rsp,%rdi), %eax +; SSE3-NEXT: movzbl 64(%rsp,%rdi), %eax ; SSE3-NEXT: movd %eax, %xmm7 -; SSE3-NEXT: andl $31, %r8d -; SSE3-NEXT: movzbl 256(%rsp,%r8), %eax +; SSE3-NEXT: andl $31, %ebx +; SSE3-NEXT: movzbl 96(%rsp,%rbx), %eax ; SSE3-NEXT: movd %eax, %xmm11 -; SSE3-NEXT: andl $31, %r10d -; SSE3-NEXT: movzbl 288(%rsp,%r10), %eax +; SSE3-NEXT: andl $31, %r9d +; SSE3-NEXT: movzbl 128(%rsp,%r9), %eax ; SSE3-NEXT: movd %eax, %xmm6 ; SSE3-NEXT: andl $31, %r13d -; SSE3-NEXT: movzbl 320(%rsp,%r13), %eax +; SSE3-NEXT: movzbl 160(%rsp,%r13), %eax ; SSE3-NEXT: movd %eax, %xmm12 ; SSE3-NEXT: andl $31, %r12d -; SSE3-NEXT: movzbl 352(%rsp,%r12), %eax +; SSE3-NEXT: movzbl 192(%rsp,%r12), %eax ; SSE3-NEXT: movd %eax, %xmm5 ; SSE3-NEXT: andl $31, %r15d -; SSE3-NEXT: movzbl 384(%rsp,%r15), %eax +; SSE3-NEXT: movzbl 224(%rsp,%r15), %eax ; SSE3-NEXT: movd %eax, %xmm13 ; SSE3-NEXT: andl $31, %r14d -; SSE3-NEXT: movzbl 416(%rsp,%r14), %eax +; SSE3-NEXT: movzbl 256(%rsp,%r14), %eax ; SSE3-NEXT: movd %eax, %xmm4 ; SSE3-NEXT: andl $31, %r11d -; SSE3-NEXT: movzbl 448(%rsp,%r11), %eax +; SSE3-NEXT: movzbl 288(%rsp,%r11), %eax ; SSE3-NEXT: movd %eax, %xmm14 -; SSE3-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSE3-NEXT: andl $31, %eax -; SSE3-NEXT: movzbl 480(%rsp,%rax), %eax +; SSE3-NEXT: andl $31, %r10d +; SSE3-NEXT: movzbl 320(%rsp,%r10), %eax ; SSE3-NEXT: movd %eax, %xmm1 ; SSE3-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; SSE3-NEXT: andl $31, %eax -; SSE3-NEXT: movzbl 512(%rsp,%rax), %eax +; SSE3-NEXT: movzbl 352(%rsp,%rax), %eax ; SSE3-NEXT: movd %eax, %xmm2 ; SSE3-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; SSE3-NEXT: andl $31, %eax -; SSE3-NEXT: movzbl 544(%rsp,%rax), %eax +; SSE3-NEXT: movzbl 384(%rsp,%rax), %eax ; SSE3-NEXT: movd %eax, %xmm0 ; SSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7] ; SSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7] @@ -769,7 +765,7 @@ ; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] ; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0] -; SSE3-NEXT: leaq -40(%rbp), %rsp +; SSE3-NEXT: addq $424, %rsp # imm = 0x1A8 ; SSE3-NEXT: popq %rbx ; SSE3-NEXT: popq %r12 ; SSE3-NEXT: popq %r13 @@ -781,116 +777,112 @@ ; SSSE3-LABEL: var_shuffle_v16i8_from_v32i8_v16i8: ; SSSE3: # %bb.0: ; SSSE3-NEXT: pushq %rbp -; SSSE3-NEXT: movq %rsp, %rbp ; SSSE3-NEXT: pushq %r15 ; SSSE3-NEXT: pushq %r14 ; SSSE3-NEXT: pushq %r13 ; SSSE3-NEXT: pushq %r12 ; SSSE3-NEXT: pushq %rbx -; SSSE3-NEXT: andq $-32, %rsp -; SSSE3-NEXT: subq $608, %rsp # imm = 0x260 -; SSSE3-NEXT: movaps %xmm2, {{[0-9]+}}(%rsp) +; SSSE3-NEXT: subq $424, %rsp # imm = 0x1A8 +; SSSE3-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) ; SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSSE3-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSSE3-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; SSSE3-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d -; SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d -; SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d +; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r10d ; SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d +; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r11d ; SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d +; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r14d ; SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d +; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r15d ; SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d +; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r12d ; SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r13d ; SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %esi +; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r9d ; SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ebx ; SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %edx +; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edi ; SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %r9d -; SSSE3-NEXT: andl $31, %r9d -; SSSE3-NEXT: movzbl 64(%rsp,%r9), %ebx -; SSSE3-NEXT: movd %ebx, %xmm8 -; SSSE3-NEXT: andl $31, %eax -; SSSE3-NEXT: movzbl 96(%rsp,%rax), %eax -; SSSE3-NEXT: movd %eax, %xmm15 +; SSSE3-NEXT: movaps %xmm0, (%rsp) +; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; SSSE3-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; SSSE3-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ebp +; SSSE3-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r8d +; SSSE3-NEXT: andl $31, %r8d +; SSSE3-NEXT: movzbl -96(%rsp,%r8), %esi +; SSSE3-NEXT: movd %esi, %xmm8 +; SSSE3-NEXT: andl $31, %ebp +; SSSE3-NEXT: movzbl -64(%rsp,%rbp), %esi +; SSSE3-NEXT: movd %esi, %xmm15 ; SSSE3-NEXT: andl $31, %edx -; SSSE3-NEXT: movzbl 128(%rsp,%rdx), %eax -; SSSE3-NEXT: movd %eax, %xmm9 +; SSSE3-NEXT: movzbl -32(%rsp,%rdx), %edx +; SSSE3-NEXT: movd %edx, %xmm9 ; SSSE3-NEXT: andl $31, %ecx -; SSSE3-NEXT: movzbl 160(%rsp,%rcx), %eax -; SSSE3-NEXT: movd %eax, %xmm3 -; SSSE3-NEXT: andl $31, %esi -; SSSE3-NEXT: movzbl 192(%rsp,%rsi), %eax +; SSSE3-NEXT: movzbl (%rsp,%rcx), %ecx +; SSSE3-NEXT: movd %ecx, %xmm3 +; SSSE3-NEXT: andl $31, %eax +; SSSE3-NEXT: movzbl 32(%rsp,%rax), %eax ; SSSE3-NEXT: movd %eax, %xmm10 ; SSSE3-NEXT: andl $31, %edi -; SSSE3-NEXT: movzbl 224(%rsp,%rdi), %eax +; SSSE3-NEXT: movzbl 64(%rsp,%rdi), %eax ; SSSE3-NEXT: movd %eax, %xmm7 -; SSSE3-NEXT: andl $31, %r8d -; SSSE3-NEXT: movzbl 256(%rsp,%r8), %eax +; SSSE3-NEXT: andl $31, %ebx +; SSSE3-NEXT: movzbl 96(%rsp,%rbx), %eax ; SSSE3-NEXT: movd %eax, %xmm11 -; SSSE3-NEXT: andl $31, %r10d -; SSSE3-NEXT: movzbl 288(%rsp,%r10), %eax +; SSSE3-NEXT: andl $31, %r9d +; SSSE3-NEXT: movzbl 128(%rsp,%r9), %eax ; SSSE3-NEXT: movd %eax, %xmm6 ; SSSE3-NEXT: andl $31, %r13d -; SSSE3-NEXT: movzbl 320(%rsp,%r13), %eax +; SSSE3-NEXT: movzbl 160(%rsp,%r13), %eax ; SSSE3-NEXT: movd %eax, %xmm12 ; SSSE3-NEXT: andl $31, %r12d -; SSSE3-NEXT: movzbl 352(%rsp,%r12), %eax +; SSSE3-NEXT: movzbl 192(%rsp,%r12), %eax ; SSSE3-NEXT: movd %eax, %xmm5 ; SSSE3-NEXT: andl $31, %r15d -; SSSE3-NEXT: movzbl 384(%rsp,%r15), %eax +; SSSE3-NEXT: movzbl 224(%rsp,%r15), %eax ; SSSE3-NEXT: movd %eax, %xmm13 ; SSSE3-NEXT: andl $31, %r14d -; SSSE3-NEXT: movzbl 416(%rsp,%r14), %eax +; SSSE3-NEXT: movzbl 256(%rsp,%r14), %eax ; SSSE3-NEXT: movd %eax, %xmm4 ; SSSE3-NEXT: andl $31, %r11d -; SSSE3-NEXT: movzbl 448(%rsp,%r11), %eax +; SSSE3-NEXT: movzbl 288(%rsp,%r11), %eax ; SSSE3-NEXT: movd %eax, %xmm14 -; SSSE3-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; SSSE3-NEXT: andl $31, %eax -; SSSE3-NEXT: movzbl 480(%rsp,%rax), %eax +; SSSE3-NEXT: andl $31, %r10d +; SSSE3-NEXT: movzbl 320(%rsp,%r10), %eax ; SSSE3-NEXT: movd %eax, %xmm1 ; SSSE3-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; SSSE3-NEXT: andl $31, %eax -; SSSE3-NEXT: movzbl 512(%rsp,%rax), %eax +; SSSE3-NEXT: movzbl 352(%rsp,%rax), %eax ; SSSE3-NEXT: movd %eax, %xmm2 ; SSSE3-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; SSSE3-NEXT: andl $31, %eax -; SSSE3-NEXT: movzbl 544(%rsp,%rax), %eax +; SSSE3-NEXT: movzbl 384(%rsp,%rax), %eax ; SSSE3-NEXT: movd %eax, %xmm0 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7] ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7] @@ -907,7 +899,7 @@ ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0] -; SSSE3-NEXT: leaq -40(%rbp), %rsp +; SSSE3-NEXT: addq $424, %rsp # imm = 0x1A8 ; SSSE3-NEXT: popq %rbx ; SSSE3-NEXT: popq %r12 ; SSSE3-NEXT: popq %r13 @@ -918,10 +910,7 @@ ; ; SSE41-LABEL: var_shuffle_v16i8_from_v32i8_v16i8: ; SSE41: # %bb.0: -; SSE41-NEXT: pushq %rbp -; SSE41-NEXT: movq %rsp, %rbp -; SSE41-NEXT: andq $-32, %rsp -; SSE41-NEXT: subq $544, %rsp # imm = 0x220 +; SSE41-NEXT: subq $392, %rsp # imm = 0x188 ; SSE41-NEXT: movd %xmm2, %eax ; SSE41-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSE41-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) @@ -947,64 +936,63 @@ ; SSE41-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSE41-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; SSE41-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; SSE41-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE41-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; SSE41-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE41-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; SSE41-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE41-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; SSE41-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE41-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSE41-NEXT: movaps %xmm0, (%rsp) -; SSE41-NEXT: movzbl 480(%rsp,%rax), %eax +; SSE41-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE41-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE41-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE41-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; SSE41-NEXT: movzbl 352(%rsp,%rax), %eax ; SSE41-NEXT: movd %eax, %xmm0 ; SSE41-NEXT: pextrb $1, %xmm2, %eax ; SSE41-NEXT: andl $31, %eax -; SSE41-NEXT: pinsrb $1, 448(%rsp,%rax), %xmm0 +; SSE41-NEXT: pinsrb $1, 320(%rsp,%rax), %xmm0 ; SSE41-NEXT: pextrb $2, %xmm2, %eax ; SSE41-NEXT: andl $31, %eax -; SSE41-NEXT: pinsrb $2, 416(%rsp,%rax), %xmm0 +; SSE41-NEXT: pinsrb $2, 288(%rsp,%rax), %xmm0 ; SSE41-NEXT: pextrb $3, %xmm2, %eax ; SSE41-NEXT: andl $31, %eax -; SSE41-NEXT: pinsrb $3, 384(%rsp,%rax), %xmm0 +; SSE41-NEXT: pinsrb $3, 256(%rsp,%rax), %xmm0 ; SSE41-NEXT: pextrb $4, %xmm2, %eax ; SSE41-NEXT: andl $31, %eax -; SSE41-NEXT: pinsrb $4, 352(%rsp,%rax), %xmm0 +; SSE41-NEXT: pinsrb $4, 224(%rsp,%rax), %xmm0 ; SSE41-NEXT: pextrb $5, %xmm2, %eax ; SSE41-NEXT: andl $31, %eax -; SSE41-NEXT: pinsrb $5, 320(%rsp,%rax), %xmm0 +; SSE41-NEXT: pinsrb $5, 192(%rsp,%rax), %xmm0 ; SSE41-NEXT: pextrb $6, %xmm2, %eax ; SSE41-NEXT: andl $31, %eax -; SSE41-NEXT: pinsrb $6, 288(%rsp,%rax), %xmm0 +; SSE41-NEXT: pinsrb $6, 160(%rsp,%rax), %xmm0 ; SSE41-NEXT: pextrb $7, %xmm2, %eax ; SSE41-NEXT: andl $31, %eax -; SSE41-NEXT: pinsrb $7, 256(%rsp,%rax), %xmm0 +; SSE41-NEXT: pinsrb $7, 128(%rsp,%rax), %xmm0 ; SSE41-NEXT: pextrb $8, %xmm2, %eax ; SSE41-NEXT: andl $31, %eax -; SSE41-NEXT: pinsrb $8, 224(%rsp,%rax), %xmm0 +; SSE41-NEXT: pinsrb $8, 96(%rsp,%rax), %xmm0 ; SSE41-NEXT: pextrb $9, %xmm2, %eax ; SSE41-NEXT: andl $31, %eax -; SSE41-NEXT: pinsrb $9, 192(%rsp,%rax), %xmm0 +; SSE41-NEXT: pinsrb $9, 64(%rsp,%rax), %xmm0 ; SSE41-NEXT: pextrb $10, %xmm2, %eax ; SSE41-NEXT: andl $31, %eax -; SSE41-NEXT: pinsrb $10, 160(%rsp,%rax), %xmm0 +; SSE41-NEXT: pinsrb $10, 32(%rsp,%rax), %xmm0 ; SSE41-NEXT: pextrb $11, %xmm2, %eax ; SSE41-NEXT: andl $31, %eax -; SSE41-NEXT: pinsrb $11, 128(%rsp,%rax), %xmm0 +; SSE41-NEXT: pinsrb $11, (%rsp,%rax), %xmm0 ; SSE41-NEXT: pextrb $12, %xmm2, %eax ; SSE41-NEXT: andl $31, %eax -; SSE41-NEXT: pinsrb $12, 96(%rsp,%rax), %xmm0 +; SSE41-NEXT: pinsrb $12, -32(%rsp,%rax), %xmm0 ; SSE41-NEXT: pextrb $13, %xmm2, %eax ; SSE41-NEXT: andl $31, %eax -; SSE41-NEXT: pinsrb $13, 64(%rsp,%rax), %xmm0 +; SSE41-NEXT: pinsrb $13, -64(%rsp,%rax), %xmm0 ; SSE41-NEXT: pextrb $14, %xmm2, %eax ; SSE41-NEXT: andl $31, %eax -; SSE41-NEXT: pinsrb $14, 32(%rsp,%rax), %xmm0 +; SSE41-NEXT: pinsrb $14, -96(%rsp,%rax), %xmm0 ; SSE41-NEXT: pextrb $15, %xmm2, %eax ; SSE41-NEXT: andl $31, %eax -; SSE41-NEXT: pinsrb $15, (%rsp,%rax), %xmm0 -; SSE41-NEXT: movq %rbp, %rsp -; SSE41-NEXT: popq %rbp +; SSE41-NEXT: pinsrb $15, -128(%rsp,%rax), %xmm0 +; SSE41-NEXT: addq $392, %rsp # imm = 0x188 ; SSE41-NEXT: retq ; ; XOP-LABEL: var_shuffle_v16i8_from_v32i8_v16i8: Index: llvm/test/CodeGen/X86/vec-strict-128.ll =================================================================== --- llvm/test/CodeGen/X86/vec-strict-128.ll +++ llvm/test/CodeGen/X86/vec-strict-128.ll @@ -342,25 +342,20 @@ define <2 x double> @f14(<2 x double> %a, <2 x double> %b, <2 x double> %c) #0 { ; SSE-X86-LABEL: f14: ; SSE-X86: # %bb.0: -; SSE-X86-NEXT: pushl %ebp -; SSE-X86-NEXT: .cfi_def_cfa_offset 8 -; SSE-X86-NEXT: .cfi_offset %ebp, -8 -; SSE-X86-NEXT: movl %esp, %ebp -; SSE-X86-NEXT: .cfi_def_cfa_register %ebp -; SSE-X86-NEXT: andl $-16, %esp -; SSE-X86-NEXT: subl $112, %esp -; SSE-X86-NEXT: movaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; SSE-X86-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; SSE-X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; SSE-X86-NEXT: subl $108, %esp +; SSE-X86-NEXT: .cfi_def_cfa_offset 112 +; SSE-X86-NEXT: movups %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; SSE-X86-NEXT: movups %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; SSE-X86-NEXT: movups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; SSE-X86-NEXT: movlps %xmm2, {{[0-9]+}}(%esp) ; SSE-X86-NEXT: movlps %xmm1, {{[0-9]+}}(%esp) ; SSE-X86-NEXT: movlps %xmm0, (%esp) ; SSE-X86-NEXT: calll fma -; SSE-X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-X86-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload ; SSE-X86-NEXT: movhps %xmm0, {{[0-9]+}}(%esp) -; SSE-X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-X86-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload ; SSE-X86-NEXT: movhps %xmm0, {{[0-9]+}}(%esp) -; SSE-X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-X86-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload ; SSE-X86-NEXT: movhps %xmm0, (%esp) ; SSE-X86-NEXT: fstpl {{[0-9]+}}(%esp) ; SSE-X86-NEXT: wait @@ -369,9 +364,8 @@ ; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-X86-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] -; SSE-X86-NEXT: movl %ebp, %esp -; SSE-X86-NEXT: popl %ebp -; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: addl $108, %esp +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE-X86-NEXT: retl ; ; SSE-X64-LABEL: f14: Index: llvm/test/CodeGen/X86/vec_insert-4.ll =================================================================== --- llvm/test/CodeGen/X86/vec_insert-4.ll +++ llvm/test/CodeGen/X86/vec_insert-4.ll @@ -5,36 +5,26 @@ define <8 x float> @f(<8 x float> %a, i32 %b) nounwind { ; X32-LABEL: f: ; X32: ## %bb.0: ## %entry -; X32-NEXT: pushl %ebp -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: andl $-32, %esp -; X32-NEXT: subl $64, %esp -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: subl $44, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: andl $7, %eax ; X32-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) ; X32-NEXT: movaps %xmm0, (%esp) ; X32-NEXT: movl $1084227584, (%esp,%eax,4) ## imm = 0x40A00000 ; X32-NEXT: movaps (%esp), %xmm0 ; X32-NEXT: movaps {{[0-9]+}}(%esp), %xmm1 -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp +; X32-NEXT: addl $44, %esp ; X32-NEXT: retl ; ; X64-LABEL: f: ; X64: ## %bb.0: ## %entry -; X64-NEXT: pushq %rbp -; X64-NEXT: movq %rsp, %rbp -; X64-NEXT: andq $-32, %rsp -; X64-NEXT: subq $64, %rsp ; X64-NEXT: ## kill: def $edi killed $edi def $rdi -; X64-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; X64-NEXT: movaps %xmm0, (%rsp) +; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: andl $7, %edi -; X64-NEXT: movl $1084227584, (%rsp,%rdi,4) ## imm = 0x40A00000 -; X64-NEXT: movaps (%rsp), %xmm0 -; X64-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; X64-NEXT: movq %rbp, %rsp -; X64-NEXT: popq %rbp +; X64-NEXT: movl $1084227584, -40(%rsp,%rdi,4) ## imm = 0x40A00000 +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; X64-NEXT: retq entry: %vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b Index: llvm/test/CodeGen/X86/vector-extend-inreg.ll =================================================================== --- llvm/test/CodeGen/X86/vector-extend-inreg.ll +++ llvm/test/CodeGen/X86/vector-extend-inreg.ll @@ -9,8 +9,8 @@ ; X32-SSE: # %bb.0: ; X32-SSE-NEXT: pushl %ebp ; X32-SSE-NEXT: movl %esp, %ebp -; X32-SSE-NEXT: andl $-128, %esp -; X32-SSE-NEXT: subl $384, %esp # imm = 0x180 +; X32-SSE-NEXT: andl $-16, %esp +; X32-SSE-NEXT: subl $272, %esp # imm = 0x110 ; X32-SSE-NEXT: movl 88(%ebp), %ecx ; X32-SSE-NEXT: movdqa 72(%ebp), %xmm0 ; X32-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero @@ -43,33 +43,29 @@ ; ; X64-SSE-LABEL: extract_any_extend_vector_inreg_v16i64: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pushq %rbp -; X64-SSE-NEXT: movq %rsp, %rbp -; X64-SSE-NEXT: andq $-128, %rsp -; X64-SSE-NEXT: subq $256, %rsp # imm = 0x100 +; X64-SSE-NEXT: pushq %rax ; X64-SSE-NEXT: # kill: def $edi killed $edi def $rdi ; X64-SSE-NEXT: psrldq {{.*#+}} xmm7 = xmm7[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero ; X64-SSE-NEXT: xorps %xmm0, %xmm0 -; X64-SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movaps %xmm0, (%rsp) -; X64-SSE-NEXT: movdqa %xmm7, {{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) ; X64-SSE-NEXT: andl $15, %edi -; X64-SSE-NEXT: movq (%rsp,%rdi,8), %rax -; X64-SSE-NEXT: movq %rbp, %rsp -; X64-SSE-NEXT: popq %rbp +; X64-SSE-NEXT: movq -128(%rsp,%rdi,8), %rax +; X64-SSE-NEXT: popq %rcx ; X64-SSE-NEXT: retq ; ; X32-AVX-LABEL: extract_any_extend_vector_inreg_v16i64: ; X32-AVX: # %bb.0: ; X32-AVX-NEXT: pushl %ebp ; X32-AVX-NEXT: movl %esp, %ebp -; X32-AVX-NEXT: andl $-128, %esp -; X32-AVX-NEXT: subl $384, %esp # imm = 0x180 +; X32-AVX-NEXT: andl $-32, %esp +; X32-AVX-NEXT: subl $288, %esp # imm = 0x120 ; X32-AVX-NEXT: movl 40(%ebp), %ecx ; X32-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X32-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 @@ -96,8 +92,8 @@ ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: pushq %rbp ; X64-AVX-NEXT: movq %rsp, %rbp -; X64-AVX-NEXT: andq $-128, %rsp -; X64-AVX-NEXT: subq $256, %rsp # imm = 0x100 +; X64-AVX-NEXT: andq $-32, %rsp +; X64-AVX-NEXT: subq $160, %rsp ; X64-AVX-NEXT: # kill: def $edi killed $edi def $rdi ; X64-AVX-NEXT: vpermq {{.*#+}} ymm0 = ymm3[3,1,2,3] ; X64-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero