Index: lib/Target/X86/X86RegisterInfo.cpp =================================================================== --- lib/Target/X86/X86RegisterInfo.cpp +++ lib/Target/X86/X86RegisterInfo.cpp @@ -595,6 +595,35 @@ llvm_unreachable("Unused function on X86. Otherwise need a test case."); } +// tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction +// of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'. +// TODO: In this case we should be really trying first to entirely eliminate +// this instruction which is a plain copy. +static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) { + MachineInstr &MI = *II; + unsigned Opc = II->getOpcode(); + // Check if this is a LEA of the form 'lea (%esp), %ebx' + if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) || + MI.getOperand(2).getImm() != 1 || + MI.getOperand(3).getReg() != X86::NoRegister || + MI.getOperand(4).getImm() != 0 || + MI.getOperand(5).getReg() != X86::NoRegister) + return false; + unsigned BasePtr = MI.getOperand(1).getReg(); + // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will + // be replaced with a 32-bit operand MOV which will zero extend the upper + // 32-bits of the super register. + if (Opc == X86::LEA64_32r) + BasePtr = getX86SubSuperRegister(BasePtr, 32); + unsigned NewDestReg = MI.getOperand(0).getReg(); + const X86InstrInfo *TII = + MI.getParent()->getParent()->getSubtarget().getInstrInfo(); + TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr, + MI.getOperand(1).isKill()); + MI.eraseFromParent(); + return true; +} + void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, @@ -669,7 +698,8 @@ int Offset = FIOffset + Imm; assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && "Requesting 64-bit offset in 32-bit immediate!"); - MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); + if (Offset != 0 || !tryOptimizeLEAtoMOV(II)) + MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); } else { // Offset is symbolic. This is extremely rare. uint64_t Offset = FIOffset + Index: test/CodeGen/X86/avx-intel-ocl.ll =================================================================== --- test/CodeGen/X86/avx-intel-ocl.ll +++ test/CodeGen/X86/avx-intel-ocl.ll @@ -25,7 +25,7 @@ ; X64-LABEL: testf16_inp ; X64: vaddps {{.*}}, {{%ymm[0-1]}} ; X64: vaddps {{.*}}, {{%ymm[0-1]}} -; X64: leaq {{.*}}(%rsp), %rdi +; X64: movq %rsp, %rdi ; X64: call ; X64: ret Index: test/CodeGen/X86/avx512-intel-ocl.ll =================================================================== --- test/CodeGen/X86/avx512-intel-ocl.ll +++ test/CodeGen/X86/avx512-intel-ocl.ll @@ -22,7 +22,7 @@ ; X64-LABEL: testf16_inp ; X64: vaddps {{.*}}, {{%zmm[0-1]}} -; X64: leaq {{.*}}(%rsp), %rdi +; X64: movq %rsp, %rdi ; X64: call ; X64: ret Index: test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll =================================================================== --- test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll +++ test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll @@ -6,7 +6,7 @@ ; ; CHECK: callq _Z3fooPcjPKc ; CHECK: callq _Z3fooPcjPKc -; CHECK: leaq (%rsp), %rdi +; CHECK: movq %rsp, %rdi ; CHECK: movl $4, %esi ; CHECK: testl {{%[a-z]+}}, {{%[a-z]+}} ; CHECK: je .LBB0_4 Index: test/CodeGen/X86/dynamic-allocas-VLAs.ll =================================================================== --- test/CodeGen/X86/dynamic-allocas-VLAs.ll +++ test/CodeGen/X86/dynamic-allocas-VLAs.ll @@ -38,7 +38,7 @@ ; CHECK: subq ${{[0-9]+}}, %rsp ; ; CHECK: leaq {{[0-9]*}}(%rsp), %rdi -; CHECK: leaq {{[0-9]*}}(%rsp), %rsi +; CHECK: movq %rsp, %rsi ; CHECK: callq _t2_helper ; ; CHECK: movq %rbp, %rsp @@ -89,7 +89,7 @@ ; CHECK: movq %rsp, %rbx ; ; CHECK: leaq {{[0-9]*}}(%rbx), %rdi -; CHECK: leaq {{[0-9]*}}(%rbx), %rdx +; CHECK: movq %rbx, %rdx ; CHECK: callq _t4_helper ; ; CHECK: leaq -{{[0-9]+}}(%rbp), %rsp Index: test/CodeGen/X86/extractelement-index.ll =================================================================== --- test/CodeGen/X86/extractelement-index.ll +++ test/CodeGen/X86/extractelement-index.ll @@ -414,7 +414,7 @@ ; SSE-NEXT: subq $64, %rsp ; SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSE-NEXT: movaps %xmm0, (%rsp) -; SSE-NEXT: leaq (%rsp), %rax +; SSE-NEXT: movq %rsp, %rax ; SSE-NEXT: movb (%rdi,%rax), %al ; SSE-NEXT: movq %rbp, %rsp ; SSE-NEXT: popq %rbp @@ -427,7 +427,7 @@ ; AVX-NEXT: andq $-32, %rsp ; AVX-NEXT: subq $64, %rsp ; AVX-NEXT: vmovaps %ymm0, (%rsp) -; AVX-NEXT: leaq (%rsp), %rax +; AVX-NEXT: movq %rsp, %rax ; AVX-NEXT: movb (%rdi,%rax), %al ; AVX-NEXT: movq %rbp, %rsp ; AVX-NEXT: popq %rbp Index: test/CodeGen/X86/fast-isel-x86-64.ll =================================================================== --- test/CodeGen/X86/fast-isel-x86-64.ll +++ test/CodeGen/X86/fast-isel-x86-64.ll @@ -254,7 +254,7 @@ call void @test20sret(%struct.a* sret %tmp) ret void ; CHECK-LABEL: test20: -; CHECK: leaq (%rsp), %rdi +; CHECK: movq %rsp, %rdi ; CHECK: callq _test20sret } declare void @test20sret(%struct.a* sret) Index: test/CodeGen/X86/fast-isel-x86.ll =================================================================== --- test/CodeGen/X86/fast-isel-x86.ll +++ test/CodeGen/X86/fast-isel-x86.ll @@ -83,7 +83,7 @@ ret void ; CHECK-LABEL: test4: ; CHECK: subl $28 -; CHECK: leal (%esp), %ecx +; CHECK: movl %esp, %ecx ; CHECK: calll _test4fastccsret ; CHECK: addl $28 } Index: test/CodeGen/X86/frameaddr.ll =================================================================== --- test/CodeGen/X86/frameaddr.ll +++ test/CodeGen/X86/frameaddr.ll @@ -19,7 +19,7 @@ ; CHECK-W64-LABEL: test1 ; CHECK-W64: push ; CHECK-W64-NEXT: movq %rsp, %rbp -; CHECK-W64-NEXT: leaq (%rbp), %rax +; CHECK-W64-NEXT: movq %rbp, %rax ; CHECK-W64-NEXT: pop ; CHECK-W64-NEXT: ret ; CHECK-64-LABEL: test1 @@ -54,7 +54,7 @@ ; CHECK-W64-LABEL: test2 ; CHECK-W64: push ; CHECK-W64-NEXT: movq %rsp, %rbp -; CHECK-W64-NEXT: leaq (%rbp), %rax +; CHECK-W64-NEXT: movq %rbp, %rax ; CHECK-W64-NEXT: pop ; CHECK-W64-NEXT: ret ; CHECK-64-LABEL: test2 Index: test/CodeGen/X86/lea-opt-memop-check-1.ll =================================================================== --- test/CodeGen/X86/lea-opt-memop-check-1.ll +++ test/CodeGen/X86/lea-opt-memop-check-1.ll @@ -21,7 +21,7 @@ %call1 = tail call <4 x float> @_mm_castsi128_ps(<2 x i64> %tmp0) ret void ; CHECK-LABEL: test1: -; CHECK: leal{{.*}} +; CHECK: movl %esp, ; CHECK: calll _memcpy ; CHECK: movaps __xmm@{{[0-9a-f]+}}, %xmm1 ; CHECK: calll __mm_xor_si128 Index: test/CodeGen/X86/local_stack_symbol_ordering.ll =================================================================== --- test/CodeGen/X86/local_stack_symbol_ordering.ll +++ test/CodeGen/X86/local_stack_symbol_ordering.ll @@ -67,7 +67,7 @@ ; X64: callq check_a ; X64: callq bar1 ; X64: callq bar1 -; X64: leaq (%rsp), %rdi +; X64: movq %rsp, %rdi ; X64: callq check_f ; X64: callq bar1 ; X64: callq bar3 Index: test/CodeGen/X86/pr29022.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/pr29022.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mcpu=skx -mtriple x86_64-unknown-linux-gnu -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mcpu=skx -mtriple=x86_64-linux-gnux32 -verify-machineinstrs | FileCheck %s --check-prefix=X32 + +define i32 @A() { +; CHECK: movq %rsp, %rdi +; CHECK-NEXT: call + +; X32: movl %esp, %edi +; X32-NEXT: call + %alloc = alloca i32, align 8 + %call = call i32 @foo(i32* %alloc) + ret i32 %call +} + +declare i32 @foo(i32*) Index: test/CodeGen/X86/sse-intel-ocl.ll =================================================================== --- test/CodeGen/X86/sse-intel-ocl.ll +++ test/CodeGen/X86/sse-intel-ocl.ll @@ -27,7 +27,7 @@ ; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}} ; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}} ; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}} -; NOT_WIN: leaq {{.*}}(%rsp), %rdi +; NOT_WIN: movq %rsp, %rdi ; NOT_WIN: call ; NOT_WIN: ret Index: test/CodeGen/X86/sse-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/sse-intrinsics-fast-isel.ll +++ test/CodeGen/X86/sse-intrinsics-fast-isel.ll @@ -813,7 +813,7 @@ ; X32-LABEL: test_MM_GET_EXCEPTION_MASK: ; X32: # BB#0: ; X32-NEXT: pushl %eax -; X32-NEXT: leal (%esp), %eax +; X32-NEXT: movl %esp, %eax ; X32-NEXT: stmxcsr (%eax) ; X32-NEXT: movl (%esp), %eax ; X32-NEXT: andl $8064, %eax # imm = 0x1F80 @@ -840,7 +840,7 @@ ; X32-LABEL: test_MM_GET_EXCEPTION_STATE: ; X32: # BB#0: ; X32-NEXT: pushl %eax -; X32-NEXT: leal (%esp), %eax +; X32-NEXT: movl %esp, %eax ; X32-NEXT: stmxcsr (%eax) ; X32-NEXT: movl (%esp), %eax ; X32-NEXT: andl $63, %eax @@ -866,7 +866,7 @@ ; X32-LABEL: test_MM_GET_FLUSH_ZERO_MODE: ; X32: # BB#0: ; X32-NEXT: pushl %eax -; X32-NEXT: leal (%esp), %eax +; X32-NEXT: movl %esp, %eax ; X32-NEXT: stmxcsr (%eax) ; X32-NEXT: movl (%esp), %eax ; X32-NEXT: andl $32768, %eax # imm = 0x8000 @@ -892,7 +892,7 @@ ; X32-LABEL: test_MM_GET_ROUNDING_MODE: ; X32: # BB#0: ; X32-NEXT: pushl %eax -; X32-NEXT: leal (%esp), %eax +; X32-NEXT: movl %esp, %eax ; X32-NEXT: stmxcsr (%eax) ; X32-NEXT: movl (%esp), %eax ; X32-NEXT: andl $24576, %eax # imm = 0x6000 @@ -918,7 +918,7 @@ ; X32-LABEL: test_mm_getcsr: ; X32: # BB#0: ; X32-NEXT: pushl %eax -; X32-NEXT: leal (%esp), %eax +; X32-NEXT: movl %esp, %eax ; X32-NEXT: stmxcsr (%eax) ; X32-NEXT: movl (%esp), %eax ; X32-NEXT: popl %ecx @@ -1427,7 +1427,7 @@ ; X32: # BB#0: ; X32-NEXT: pushl %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: leal (%esp), %ecx +; X32-NEXT: movl %esp, %ecx ; X32-NEXT: stmxcsr (%ecx) ; X32-NEXT: movl (%esp), %edx ; X32-NEXT: andl $-8065, %edx # imm = 0xE07F @@ -1464,7 +1464,7 @@ ; X32: # BB#0: ; X32-NEXT: pushl %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: leal (%esp), %ecx +; X32-NEXT: movl %esp, %ecx ; X32-NEXT: stmxcsr (%ecx) ; X32-NEXT: movl (%esp), %edx ; X32-NEXT: andl $-64, %edx @@ -1500,7 +1500,7 @@ ; X32: # BB#0: ; X32-NEXT: pushl %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: leal (%esp), %ecx +; X32-NEXT: movl %esp, %ecx ; X32-NEXT: stmxcsr (%ecx) ; X32-NEXT: movl (%esp), %edx ; X32-NEXT: andl $-32769, %edx # imm = 0xFFFF7FFF @@ -1580,7 +1580,7 @@ ; X32: # BB#0: ; X32-NEXT: pushl %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: leal (%esp), %ecx +; X32-NEXT: movl %esp, %ecx ; X32-NEXT: stmxcsr (%ecx) ; X32-NEXT: movl (%esp), %edx ; X32-NEXT: andl $-24577, %edx # imm = 0x9FFF @@ -1655,7 +1655,7 @@ ; X32: # BB#0: ; X32-NEXT: pushl %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: leal (%esp), %ecx +; X32-NEXT: movl %esp, %ecx ; X32-NEXT: movl %eax, (%esp) ; X32-NEXT: ldmxcsr (%ecx) ; X32-NEXT: popl %eax Index: test/CodeGen/X86/swift-return.ll =================================================================== --- test/CodeGen/X86/swift-return.ll +++ test/CodeGen/X86/swift-return.ll @@ -38,7 +38,7 @@ ; in memroy. The caller provides space for the return value and passes ; the address in %rax. The first input argument will be in %rdi. ; CHECK-LABEL: test2: -; CHECK: leaq (%rsp), %rax +; CHECK: movq %rsp, %rax ; CHECK: callq gen2 ; CHECK: movl (%rsp) ; CHECK-DAG: addl 4(%rsp) @@ -46,7 +46,7 @@ ; CHECK-DAG: addl 12(%rsp) ; CHECK-DAG: addl 16(%rsp) ; CHECK-O0-LABEL: test2: -; CHECK-O0-DAG: leaq (%rsp), %rax +; CHECK-O0-DAG: movq %rsp, %rax ; CHECK-O0: callq gen2 ; CHECK-O0-DAG: movl (%rsp) ; CHECK-O0-DAG: movl 4(%rsp) Index: test/CodeGen/X86/win32_sret.ll =================================================================== --- test/CodeGen/X86/win32_sret.ll +++ test/CodeGen/X86/win32_sret.ll @@ -138,7 +138,7 @@ ; The this pointer goes to ECX. ; (through %ecx in the -O0 build). ; WIN32: leal {{[0-9]*}}(%esp), %e{{[a-d]}}x -; WIN32: leal {{[0-9]*}}(%esp), %ecx +; WIN32: {{leal [1-9]+\(%esp\)|movl %esp}}, %ecx ; WIN32: {{pushl %e[a-d]x|movl %e[a-d]x, \(%esp\)}} ; WIN32-NEXT: calll "?foo@C5@@QAE?AUS5@@XZ" ; WIN32: retl @@ -158,16 +158,16 @@ ; The sret pointer is (%esp) -; WIN32: leal {{4?}}(%esp), %eax +; WIN32: {{leal 4\(%esp\)|movl %esp}}, %eax ; WIN32-NEXT: {{pushl %eax|movl %eax, \(%esp\)}} ; The sret pointer is %ecx ; The %x argument is moved to (%esp). It will be the this pointer. -; MINGW_X86: leal {{4?}}(%esp), %ecx +; MINGW_X86: {{leal 4\(%esp\)|movl %esp}}, %ecx ; MINGW_X86-NEXT: {{pushl 16\(%esp\)|movl %eax, \(%esp\)}} ; MINGW_X86-NEXT: calll _test6_g -; CYGWIN: leal {{4?}}(%esp), %ecx +; CYGWIN: {{leal 4\(%esp\)|movl %esp}}, %ecx ; CYGWIN-NEXT: {{pushl 16\(%esp\)|movl %eax, \(%esp\)}} ; CYGWIN-NEXT: calll _test6_g @@ -191,11 +191,11 @@ ; CYGWIN: movl {{16|20}}(%esp), %ecx ; The sret pointer is (%esp) -; WIN32: leal {{4?}}(%esp), %eax +; WIN32: {{leal 4\(%esp\)|movl %esp}}, %eax ; WIN32-NEXT: {{pushl %eax|movl %eax, \(%esp\)}} -; MINGW_X86: leal {{4?}}(%esp), %eax +; MINGW_X86: {{leal 4\(%esp\)|movl %esp}}, %eax ; MINGW_X86-NEXT: {{pushl %eax|movl %eax, \(%esp\)}} -; CYGWIN: leal {{4?}}(%esp), %eax +; CYGWIN: {{leal 4\(%esp\)|movl %esp}}, %eax ; CYGWIN-NEXT: {{pushl %eax|movl %eax, \(%esp\)}} %tmp = alloca %struct.test7, align 4 Index: test/CodeGen/X86/win64_frame.ll =================================================================== --- test/CodeGen/X86/win64_frame.ll +++ test/CodeGen/X86/win64_frame.ll @@ -110,7 +110,7 @@ %gep = getelementptr [300 x i8], [300 x i8]* %alloca, i32 0, i32 0 call void @external(i8* %gep) ; CHECK: subq $32, %rsp - ; CHECK: leaq (%rbx), %rcx + ; CHECK: movq %rbx, %rcx ; CHECK: callq external ; CHECK: addq $32, %rsp Index: test/DebugInfo/COFF/inlining.ll =================================================================== --- test/DebugInfo/COFF/inlining.ll +++ test/DebugInfo/COFF/inlining.ll @@ -170,7 +170,7 @@ ; OBJ: PtrParent: 0x0 ; OBJ: PtrEnd: 0x0 ; OBJ: PtrNext: 0x0 -; OBJ: CodeSize: 0x3D +; OBJ: CodeSize: 0x3C ; OBJ: DbgStart: 0x0 ; OBJ: DbgEnd: 0x0 ; OBJ: FunctionType: baz (0x1004) @@ -189,7 +189,7 @@ ; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0x8, LineOffset: 1} ; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0x7, LineOffset: 1} ; OBJ-NEXT: ChangeLineOffset: 1 -; OBJ-NEXT: ChangeCodeOffset: 0x1E +; OBJ-NEXT: ChangeCodeOffset: 0x1D ; OBJ-NEXT: ChangeCodeLength: 0x7 ; OBJ: ] ; OBJ: } @@ -199,7 +199,7 @@ ; OBJ: Inlinee: foo (0x1003) ; OBJ: BinaryAnnotations [ ; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0xF, LineOffset: 1} -; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0xA, LineOffset: 1} +; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0x9, LineOffset: 1} ; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0x6, LineOffset: 1} ; OBJ-NEXT: ChangeCodeOffsetAndLineOffset: {CodeOffset: 0x7, LineOffset: 1} ; OBJ-NEXT: ChangeCodeLength: 0x7