diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -359,7 +359,16 @@ // If this block loops back to itself, it is necessary to check whether the // use comes after the def. if (MBB->isSuccessor(MBB)) { - SelfLoopDef = MRI->getUniqueVRegDef(VirtReg); + // Find the first def in the self loop MBB. + for (const MachineInstr &DefInst : MRI->def_instructions(VirtReg)) { + if (DefInst.getParent() != MBB) { + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); + return true; + } else { + if (!SelfLoopDef || dominates(*MBB, DefInst.getIterator(), SelfLoopDef)) + SelfLoopDef = &DefInst; + } + } if (!SelfLoopDef) { MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); return true; diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll b/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll --- a/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll @@ -258,19 +258,19 @@ ; ; LSE-LABEL: test_rmw_add_128: ; LSE: // %bb.0: // %entry -; LSE-NEXT: sub sp, sp, #80 -; LSE-NEXT: .cfi_def_cfa_offset 80 -; LSE-NEXT: str x0, [sp, #56] // 8-byte Folded Spill +; LSE-NEXT: sub sp, sp, #48 +; LSE-NEXT: .cfi_def_cfa_offset 48 +; LSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill ; LSE-NEXT: ldr x8, [x0, #8] ; LSE-NEXT: ldr x9, [x0] -; LSE-NEXT: str x9, [sp, #64] // 8-byte Folded Spill -; LSE-NEXT: str x8, [sp, #72] // 8-byte Folded Spill +; LSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill +; LSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill ; LSE-NEXT: b .LBB4_1 ; LSE-NEXT: .LBB4_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 -; LSE-NEXT: ldr x10, [sp, #72] // 8-byte Folded Reload -; LSE-NEXT: ldr x8, [sp, #64] // 8-byte Folded Reload -; LSE-NEXT: ldr x9, [sp, #56] // 8-byte Folded Reload +; LSE-NEXT: ldr x10, [sp, #40] // 8-byte Folded Reload +; LSE-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload +; LSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload ; LSE-NEXT: adds x2, x8, #1 ; LSE-NEXT: mov x11, xzr ; LSE-NEXT: adcs x11, x10, x11 @@ -278,24 +278,22 @@ ; LSE-NEXT: mov x3, x11 ; LSE-NEXT: mov x0, x8 ; LSE-NEXT: mov x1, x10 -; LSE-NEXT: stp x0, x1, [sp, #8] // 16-byte Folded Spill ; LSE-NEXT: caspal x0, x1, x2, x3, [x9] -; LSE-NEXT: stp x0, x1, [sp, #24] // 16-byte Folded Spill ; LSE-NEXT: mov x9, x1 -; LSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill +; LSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill ; LSE-NEXT: eor x11, x9, x10 ; LSE-NEXT: mov x10, x0 -; LSE-NEXT: str x10, [sp, #48] // 8-byte Folded Spill +; LSE-NEXT: str x10, [sp, #16] // 8-byte Folded Spill ; LSE-NEXT: eor x8, x10, x8 ; LSE-NEXT: orr x8, x8, x11 -; LSE-NEXT: str x10, [sp, #64] // 8-byte Folded Spill -; LSE-NEXT: str x9, [sp, #72] // 8-byte Folded Spill +; LSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill +; LSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill ; LSE-NEXT: cbnz x8, .LBB4_1 ; LSE-NEXT: b .LBB4_2 ; LSE-NEXT: .LBB4_2: // %atomicrmw.end -; LSE-NEXT: ldr x1, [sp, #40] // 8-byte Folded Reload -; LSE-NEXT: ldr x0, [sp, #48] // 8-byte Folded Reload -; LSE-NEXT: add sp, sp, #80 +; LSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload +; LSE-NEXT: ldr x0, [sp, #16] // 8-byte Folded Reload +; LSE-NEXT: add sp, sp, #48 ; LSE-NEXT: ret entry: %res = atomicrmw add i128* %dst, i128 1 seq_cst @@ -357,7 +355,6 @@ ; LSE-NEXT: orr w10, w8, #0xfffffffe ; LSE-NEXT: mov w8, w9 ; LSE-NEXT: casalb w8, w10, [x11] -; LSE-NEXT: str w8, [sp, #8] // 4-byte Folded Spill ; LSE-NEXT: subs w9, w8, w9, uxtb ; LSE-NEXT: cset w9, eq ; LSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill @@ -430,7 +427,6 @@ ; LSE-NEXT: orr w10, w8, #0xfffffffe ; LSE-NEXT: mov w8, w9 ; LSE-NEXT: casalh w8, w10, [x11] -; LSE-NEXT: str w8, [sp, #8] // 4-byte Folded Spill ; LSE-NEXT: subs w9, w8, w9, uxth ; LSE-NEXT: cset w9, eq ; LSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill @@ -503,7 +499,6 @@ ; LSE-NEXT: orr w10, w8, #0xfffffffe ; LSE-NEXT: mov w8, w9 ; LSE-NEXT: casal w8, w10, [x11] -; LSE-NEXT: str w8, [sp, #8] // 4-byte Folded Spill ; LSE-NEXT: subs w9, w8, w9 ; LSE-NEXT: cset w9, eq ; LSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill @@ -582,7 +577,6 @@ ; LSE-NEXT: orr x10, x8, #0xfffffffffffffffe ; LSE-NEXT: mov x8, x9 ; LSE-NEXT: casal x8, x10, [x11] -; LSE-NEXT: str x8, [sp] // 8-byte Folded Spill ; LSE-NEXT: subs x9, x8, x9 ; LSE-NEXT: cset w9, eq ; LSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill @@ -659,22 +653,21 @@ ; ; LSE-LABEL: test_rmw_nand_128: ; LSE: // %bb.0: // %entry -; LSE-NEXT: sub sp, sp, #80 -; LSE-NEXT: .cfi_def_cfa_offset 80 -; LSE-NEXT: str x0, [sp, #56] // 8-byte Folded Spill +; LSE-NEXT: sub sp, sp, #48 +; LSE-NEXT: .cfi_def_cfa_offset 48 +; LSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill ; LSE-NEXT: ldr x8, [x0, #8] ; LSE-NEXT: ldr x9, [x0] -; LSE-NEXT: str x9, [sp, #64] // 8-byte Folded Spill -; LSE-NEXT: str x8, [sp, #72] // 8-byte Folded Spill +; LSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill +; LSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill ; LSE-NEXT: b .LBB9_1 ; LSE-NEXT: .LBB9_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 -; LSE-NEXT: ldr x10, [sp, #72] // 8-byte Folded Reload -; LSE-NEXT: ldr x8, [sp, #64] // 8-byte Folded Reload -; LSE-NEXT: ldr x9, [sp, #56] // 8-byte Folded Reload +; LSE-NEXT: ldr x10, [sp, #40] // 8-byte Folded Reload +; LSE-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload +; LSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload ; LSE-NEXT: mov x0, x8 ; LSE-NEXT: mov x1, x10 -; LSE-NEXT: stp x0, x1, [sp, #8] // 16-byte Folded Spill ; LSE-NEXT: mov w11, w8 ; LSE-NEXT: mvn w12, w11 ; LSE-NEXT: // implicit-def: $x11 @@ -684,22 +677,21 @@ ; LSE-NEXT: // kill: def $x2 killed $x2 def $x2_x3 ; LSE-NEXT: mov x3, x11 ; LSE-NEXT: caspal x0, x1, x2, x3, [x9] -; LSE-NEXT: stp x0, x1, [sp, #24] // 16-byte Folded Spill ; LSE-NEXT: mov x9, x1 -; LSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill +; LSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill ; LSE-NEXT: eor x11, x9, x10 ; LSE-NEXT: mov x10, x0 -; LSE-NEXT: str x10, [sp, #48] // 8-byte Folded Spill +; LSE-NEXT: str x10, [sp, #16] // 8-byte Folded Spill ; LSE-NEXT: eor x8, x10, x8 ; LSE-NEXT: orr x8, x8, x11 -; LSE-NEXT: str x10, [sp, #64] // 8-byte Folded Spill -; LSE-NEXT: str x9, [sp, #72] // 8-byte Folded Spill +; LSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill +; LSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill ; LSE-NEXT: cbnz x8, .LBB9_1 ; LSE-NEXT: b .LBB9_2 ; LSE-NEXT: .LBB9_2: // %atomicrmw.end -; LSE-NEXT: ldr x1, [sp, #40] // 8-byte Folded Reload -; LSE-NEXT: ldr x0, [sp, #48] // 8-byte Folded Reload -; LSE-NEXT: add sp, sp, #80 +; LSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload +; LSE-NEXT: ldr x0, [sp, #16] // 8-byte Folded Reload +; LSE-NEXT: add sp, sp, #48 ; LSE-NEXT: ret entry: %res = atomicrmw nand i128* %dst, i128 1 seq_cst diff --git a/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir b/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir --- a/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir +++ b/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir @@ -55,11 +55,11 @@ ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, implicit $exec ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec - ; GCN: SI_SPILL_V32_SAVE $vgpr2, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, implicit $exec + ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN: bb.2: ; GCN: S_ENDPGM 0 + bb.0: liveins: $vgpr0_vgpr1 %0:vreg_64 = COPY $vgpr0_vgpr1 diff --git a/llvm/test/CodeGen/X86/atomic32.ll b/llvm/test/CodeGen/X86/atomic32.ll --- a/llvm/test/CodeGen/X86/atomic32.ll +++ b/llvm/test/CodeGen/X86/atomic32.ll @@ -226,7 +226,6 @@ ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: andl %edx, %ecx -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: notl %ecx ; X64-NEXT: lock cmpxchgl %ecx, sc32(%rip) ; X64-NEXT: sete %cl @@ -239,18 +238,17 @@ ; ; X86-LABEL: atomic_fetch_nand32: ; X86: # %bb.0: -; X86-NEXT: subl $12, %esp +; X86-NEXT: subl $8, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: movl sc32, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: .LBB5_1: # %atomicrmw.start ; X86-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: andl %edx, %ecx -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-NEXT: notl %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 ; X86-NEXT: sete %cl @@ -259,7 +257,7 @@ ; X86-NEXT: jne .LBB5_2 ; X86-NEXT: jmp .LBB5_1 ; X86-NEXT: .LBB5_2: # %atomicrmw.end -; X86-NEXT: addl $12, %esp +; X86-NEXT: addl $8, %esp ; X86-NEXT: retl %t1 = atomicrmw nand i32* @sc32, i32 %x acquire ret void @@ -277,7 +275,6 @@ ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; X64-NEXT: movl %eax, %edx ; X64-NEXT: subl %ecx, %edx -; X64-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: cmovgl %eax, %ecx ; X64-NEXT: lock cmpxchgl %ecx, sc32(%rip) ; X64-NEXT: sete %cl @@ -290,18 +287,17 @@ ; ; X86-CMOV-LABEL: atomic_fetch_max32: ; X86-CMOV: # %bb.0: -; X86-CMOV-NEXT: subl $12, %esp +; X86-CMOV-NEXT: subl $8, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: movl sc32, %eax ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB6_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-CMOV-NEXT: movl (%esp), %ecx # 4-byte Reload ; X86-CMOV-NEXT: movl %eax, %edx ; X86-CMOV-NEXT: subl %ecx, %edx -; X86-CMOV-NEXT: movl %edx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: cmovgl %eax, %ecx ; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32 ; X86-CMOV-NEXT: sete %cl @@ -310,7 +306,7 @@ ; X86-CMOV-NEXT: jne .LBB6_2 ; X86-CMOV-NEXT: jmp .LBB6_1 ; X86-CMOV-NEXT: .LBB6_2: # %atomicrmw.end -; X86-CMOV-NEXT: addl $12, %esp +; X86-CMOV-NEXT: addl $8, %esp ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_max32: @@ -396,7 +392,6 @@ ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; X64-NEXT: movl %eax, %edx ; X64-NEXT: subl %ecx, %edx -; X64-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: cmovlel %eax, %ecx ; X64-NEXT: lock cmpxchgl %ecx, sc32(%rip) ; X64-NEXT: sete %cl @@ -409,18 +404,17 @@ ; ; X86-CMOV-LABEL: atomic_fetch_min32: ; X86-CMOV: # %bb.0: -; X86-CMOV-NEXT: subl $12, %esp +; X86-CMOV-NEXT: subl $8, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: movl sc32, %eax ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB7_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-CMOV-NEXT: movl (%esp), %ecx # 4-byte Reload ; X86-CMOV-NEXT: movl %eax, %edx ; X86-CMOV-NEXT: subl %ecx, %edx -; X86-CMOV-NEXT: movl %edx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: cmovlel %eax, %ecx ; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32 ; X86-CMOV-NEXT: sete %cl @@ -429,7 +423,7 @@ ; X86-CMOV-NEXT: jne .LBB7_2 ; X86-CMOV-NEXT: jmp .LBB7_1 ; X86-CMOV-NEXT: .LBB7_2: # %atomicrmw.end -; X86-CMOV-NEXT: addl $12, %esp +; X86-CMOV-NEXT: addl $8, %esp ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_min32: @@ -515,7 +509,6 @@ ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; X64-NEXT: movl %eax, %edx ; X64-NEXT: subl %ecx, %edx -; X64-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: cmoval %eax, %ecx ; X64-NEXT: lock cmpxchgl %ecx, sc32(%rip) ; X64-NEXT: sete %cl @@ -528,18 +521,17 @@ ; ; X86-CMOV-LABEL: atomic_fetch_umax32: ; X86-CMOV: # %bb.0: -; X86-CMOV-NEXT: subl $12, %esp +; X86-CMOV-NEXT: subl $8, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: movl sc32, %eax ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB8_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-CMOV-NEXT: movl (%esp), %ecx # 4-byte Reload ; X86-CMOV-NEXT: movl %eax, %edx ; X86-CMOV-NEXT: subl %ecx, %edx -; X86-CMOV-NEXT: movl %edx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: cmoval %eax, %ecx ; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32 ; X86-CMOV-NEXT: sete %cl @@ -548,7 +540,7 @@ ; X86-CMOV-NEXT: jne .LBB8_2 ; X86-CMOV-NEXT: jmp .LBB8_1 ; X86-CMOV-NEXT: .LBB8_2: # %atomicrmw.end -; X86-CMOV-NEXT: addl $12, %esp +; X86-CMOV-NEXT: addl $8, %esp ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_umax32: @@ -634,7 +626,6 @@ ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; X64-NEXT: movl %eax, %edx ; X64-NEXT: subl %ecx, %edx -; X64-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: cmovbel %eax, %ecx ; X64-NEXT: lock cmpxchgl %ecx, sc32(%rip) ; X64-NEXT: sete %cl @@ -647,18 +638,17 @@ ; ; X86-CMOV-LABEL: atomic_fetch_umin32: ; X86-CMOV: # %bb.0: -; X86-CMOV-NEXT: subl $12, %esp +; X86-CMOV-NEXT: subl $8, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: movl sc32, %eax ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB9_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-CMOV-NEXT: movl (%esp), %ecx # 4-byte Reload ; X86-CMOV-NEXT: movl %eax, %edx ; X86-CMOV-NEXT: subl %ecx, %edx -; X86-CMOV-NEXT: movl %edx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: cmovbel %eax, %ecx ; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32 ; X86-CMOV-NEXT: sete %cl @@ -667,7 +657,7 @@ ; X86-CMOV-NEXT: jne .LBB9_2 ; X86-CMOV-NEXT: jmp .LBB9_1 ; X86-CMOV-NEXT: .LBB9_2: # %atomicrmw.end -; X86-CMOV-NEXT: addl $12, %esp +; X86-CMOV-NEXT: addl $8, %esp ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_umin32: diff --git a/llvm/test/CodeGen/X86/atomic64.ll b/llvm/test/CodeGen/X86/atomic64.ll --- a/llvm/test/CodeGen/X86/atomic64.ll +++ b/llvm/test/CodeGen/X86/atomic64.ll @@ -272,7 +272,6 @@ ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: andq %rdx, %rcx -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: notq %rcx ; X64-NEXT: lock cmpxchgq %rcx, sc64(%rip) ; X64-NEXT: sete %cl @@ -312,7 +311,6 @@ ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rax, %rdx ; X64-NEXT: subq %rcx, %rdx -; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: cmovgq %rax, %rcx ; X64-NEXT: lock cmpxchgq %rcx, sc64(%rip) ; X64-NEXT: sete %cl @@ -405,7 +403,6 @@ ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rax, %rdx ; X64-NEXT: subq %rcx, %rdx -; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: cmovleq %rax, %rcx ; X64-NEXT: lock cmpxchgq %rcx, sc64(%rip) ; X64-NEXT: sete %cl @@ -498,7 +495,6 @@ ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rax, %rdx ; X64-NEXT: subq %rcx, %rdx -; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: cmovaq %rax, %rcx ; X64-NEXT: lock cmpxchgq %rcx, sc64(%rip) ; X64-NEXT: sete %cl @@ -591,7 +587,6 @@ ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rax, %rdx ; X64-NEXT: subq %rcx, %rdx -; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: cmovbeq %rax, %rcx ; X64-NEXT: lock cmpxchgq %rcx, sc64(%rip) ; X64-NEXT: sete %cl diff --git a/llvm/test/CodeGen/X86/atomic6432.ll b/llvm/test/CodeGen/X86/atomic6432.ll --- a/llvm/test/CodeGen/X86/atomic6432.ll +++ b/llvm/test/CodeGen/X86/atomic6432.ll @@ -8,7 +8,7 @@ ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: subl $72, %esp +; X32-NEXT: subl $40, %esp ; X32-NEXT: movl sc64+4, %edx ; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -20,10 +20,8 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl $1, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -41,10 +39,8 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl $3, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -62,10 +58,8 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl $5, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -76,28 +70,26 @@ ; X32-NEXT: .LBB0_6: # %atomicrmw.end7 ; X32-NEXT: movl sc64+4, %edx ; X32-NEXT: movl sc64, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB0_7 ; X32-NEXT: .LBB0_7: # %atomicrmw.start14 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl (%esp), %eax # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB0_7 ; X32-NEXT: jmp .LBB0_8 ; X32-NEXT: .LBB0_8: # %atomicrmw.end13 -; X32-NEXT: addl $72, %esp +; X32-NEXT: addl $40, %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %ebx ; X32-NEXT: retl @@ -114,7 +106,7 @@ ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: subl $72, %esp +; X32-NEXT: subl $40, %esp ; X32-NEXT: movl sc64+4, %edx ; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -126,10 +118,8 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl $-1, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $-1, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -147,10 +137,8 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl $-3, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $-1, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -168,10 +156,8 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl $-5, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $-1, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -182,28 +168,26 @@ ; X32-NEXT: .LBB1_6: # %atomicrmw.end7 ; X32-NEXT: movl sc64+4, %edx ; X32-NEXT: movl sc64, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB1_7 ; X32-NEXT: .LBB1_7: # %atomicrmw.start14 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl (%esp), %eax # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: subl %ecx, %ebx -; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: sbbl %esi, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB1_7 ; X32-NEXT: jmp .LBB1_8 ; X32-NEXT: .LBB1_8: # %atomicrmw.end13 -; X32-NEXT: addl $72, %esp +; X32-NEXT: addl $40, %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %ebx ; X32-NEXT: retl @@ -219,7 +203,7 @@ ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: subl $52, %esp +; X32-NEXT: subl $32, %esp ; X32-NEXT: movl sc64+4, %edx ; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -231,7 +215,6 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: andl $3, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: lock cmpxchg8b sc64 ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -250,10 +233,8 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: andl $1, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: andl $1, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -264,28 +245,26 @@ ; X32-NEXT: .LBB2_4: # %atomicrmw.end1 ; X32-NEXT: movl sc64+4, %edx ; X32-NEXT: movl sc64, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB2_5 ; X32-NEXT: .LBB2_5: # %atomicrmw.start8 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl (%esp), %eax # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: andl %ecx, %ebx -; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: andl %esi, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB2_5 ; X32-NEXT: jmp .LBB2_6 ; X32-NEXT: .LBB2_6: # %atomicrmw.end7 -; X32-NEXT: addl $52, %esp +; X32-NEXT: addl $32, %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %ebx ; X32-NEXT: retl @@ -300,7 +279,7 @@ ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: subl $52, %esp +; X32-NEXT: subl $32, %esp ; X32-NEXT: movl sc64+4, %edx ; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -312,7 +291,6 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: orl $3, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: lock cmpxchg8b sc64 ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -331,10 +309,8 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: orl $1, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: orl $1, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -345,28 +321,26 @@ ; X32-NEXT: .LBB3_4: # %atomicrmw.end1 ; X32-NEXT: movl sc64+4, %edx ; X32-NEXT: movl sc64, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB3_5 ; X32-NEXT: .LBB3_5: # %atomicrmw.start8 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl (%esp), %eax # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: orl %ecx, %ebx -; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: orl %esi, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB3_5 ; X32-NEXT: jmp .LBB3_6 ; X32-NEXT: .LBB3_6: # %atomicrmw.end7 -; X32-NEXT: addl $52, %esp +; X32-NEXT: addl $32, %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %ebx ; X32-NEXT: retl @@ -381,7 +355,7 @@ ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: subl $52, %esp +; X32-NEXT: subl $32, %esp ; X32-NEXT: movl sc64+4, %edx ; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -393,7 +367,6 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: xorl $3, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: lock cmpxchg8b sc64 ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -412,10 +385,8 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: xorl $1, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: xorl $1, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -426,28 +397,26 @@ ; X32-NEXT: .LBB4_4: # %atomicrmw.end1 ; X32-NEXT: movl sc64+4, %edx ; X32-NEXT: movl sc64, %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB4_5 ; X32-NEXT: .LBB4_5: # %atomicrmw.start8 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl (%esp), %eax # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: xorl %ecx, %ebx -; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: xorl %esi, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB4_5 ; X32-NEXT: jmp .LBB4_6 ; X32-NEXT: .LBB4_6: # %atomicrmw.end7 -; X32-NEXT: addl $52, %esp +; X32-NEXT: addl $32, %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %ebx ; X32-NEXT: retl @@ -463,9 +432,9 @@ ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $32, %esp +; X32-NEXT: subl $16, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl sc64+4, %edx @@ -478,24 +447,20 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl (%esp), %edi # 4-byte Reload ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: andl %edi, %ecx -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: andl %esi, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: notl %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: notl %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB5_1 ; X32-NEXT: jmp .LBB5_2 ; X32-NEXT: .LBB5_2: # %atomicrmw.end -; X32-NEXT: addl $32, %esp +; X32-NEXT: addl $16, %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx @@ -509,9 +474,9 @@ ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: subl $32, %esp +; X32-NEXT: subl $16, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl sc64+4, %edx @@ -524,24 +489,20 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload ; X32-NEXT: movl %ebx, %esi ; X32-NEXT: subl %eax, %esi -; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: movl %ecx, %esi ; X32-NEXT: sbbl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: cmovll %edx, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: cmovll %eax, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB6_1 ; X32-NEXT: jmp .LBB6_2 ; X32-NEXT: .LBB6_2: # %atomicrmw.end -; X32-NEXT: addl $32, %esp +; X32-NEXT: addl $16, %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %ebx ; X32-NEXT: retl @@ -554,9 +515,9 @@ ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: subl $32, %esp +; X32-NEXT: subl $16, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl sc64+4, %edx @@ -569,24 +530,20 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload ; X32-NEXT: movl %ebx, %esi ; X32-NEXT: subl %eax, %esi -; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: movl %ecx, %esi ; X32-NEXT: sbbl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: cmovgel %edx, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: cmovgel %eax, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB7_1 ; X32-NEXT: jmp .LBB7_2 ; X32-NEXT: .LBB7_2: # %atomicrmw.end -; X32-NEXT: addl $32, %esp +; X32-NEXT: addl $16, %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %ebx ; X32-NEXT: retl @@ -599,9 +556,9 @@ ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: subl $32, %esp +; X32-NEXT: subl $16, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl sc64+4, %edx @@ -614,24 +571,20 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload ; X32-NEXT: movl %ebx, %esi ; X32-NEXT: subl %eax, %esi -; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: movl %ecx, %esi ; X32-NEXT: sbbl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: cmovbl %edx, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: cmovbl %eax, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB8_1 ; X32-NEXT: jmp .LBB8_2 ; X32-NEXT: .LBB8_2: # %atomicrmw.end -; X32-NEXT: addl $32, %esp +; X32-NEXT: addl $16, %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %ebx ; X32-NEXT: retl @@ -644,9 +597,9 @@ ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: subl $32, %esp +; X32-NEXT: subl $16, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl sc64+4, %edx @@ -659,24 +612,20 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload ; X32-NEXT: movl %ebx, %esi ; X32-NEXT: subl %eax, %esi -; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: movl %ecx, %esi ; X32-NEXT: sbbl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: cmovael %edx, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: cmovael %eax, %ebx -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB9_1 ; X32-NEXT: jmp .LBB9_2 ; X32-NEXT: .LBB9_2: # %atomicrmw.end -; X32-NEXT: addl $32, %esp +; X32-NEXT: addl $16, %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %ebx ; X32-NEXT: retl diff --git a/llvm/test/CodeGen/X86/fastregalloc-selfloop.mir b/llvm/test/CodeGen/X86/fastregalloc-selfloop.mir --- a/llvm/test/CodeGen/X86/fastregalloc-selfloop.mir +++ b/llvm/test/CodeGen/X86/fastregalloc-selfloop.mir @@ -14,11 +14,10 @@ machineFunctionInfo: {} body: | bb.0.entry: - ; CHECK: renamable $xmm1 = V_SET0 + ; CHECK: renamable $xmm1 = V_SET0 ; CHECK-NEXT: renamable $xmm0 = V_SET0 ; CHECK-NEXT: renamable $xmm1 = PXORrr renamable $xmm1, renamable $xmm0 - ; CHECK-NEXT: MOVAPSmr %stack.1, 1, $noreg, 0, $noreg, $xmm1 :: (store (s128) into %stack.1) - ; CHECK-NEXT: MOVAPSmr %stack.0, 1, $noreg, 0, $noreg, renamable $xmm1 + ; CHECK-NEXT: MOVAPSmr %stack.0, 1, $noreg, 0, $noreg, killed renamable $xmm1 ; CHECK-NEXT: MOVAPSmr %stack.0, 1, $noreg, 16, $noreg, killed renamable $xmm0 %0:vr128 = V_SET0 diff --git a/llvm/test/CodeGen/X86/swifterror.ll b/llvm/test/CodeGen/X86/swifterror.ll --- a/llvm/test/CodeGen/X86/swifterror.ll +++ b/llvm/test/CodeGen/X86/swifterror.ll @@ -983,7 +983,6 @@ ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 ## 8-byte Reload ; CHECK-O0-NEXT: ## implicit-def: $edi ; CHECK-O0-NEXT: movw %ax, %di -; CHECK-O0-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill ; CHECK-O0-NEXT: ## implicit-def: $rax ; CHECK-O0-NEXT: callq *%rax ; CHECK-O0-NEXT: ## implicit-def: $rax diff --git a/llvm/test/DebugInfo/X86/fission-ranges.ll b/llvm/test/DebugInfo/X86/fission-ranges.ll --- a/llvm/test/DebugInfo/X86/fission-ranges.ll +++ b/llvm/test/DebugInfo/X86/fission-ranges.ll @@ -51,7 +51,7 @@ ; CHECK-NEXT: DW_LLE_end_of_list () ; CHECK: [[E]]: ; CHECK-NEXT: DW_LLE_startx_length (0x00000004, 0x0000000b): DW_OP_reg0 RAX -; CHECK-NEXT: DW_LLE_startx_length (0x00000005, 0x0000005a): DW_OP_breg7 RSP-48 +; CHECK-NEXT: DW_LLE_startx_length (0x00000005, 0x0000005a): DW_OP_breg7 RSP-36 ; CHECK-NEXT: DW_LLE_end_of_list () ; CHECK: [[B]]: ; CHECK-NEXT: DW_LLE_startx_length (0x00000006, 0x0000000b): DW_OP_reg0 RAX