Index: llvm/lib/CodeGen/RegAllocGreedy.cpp =================================================================== --- llvm/lib/CodeGen/RegAllocGreedy.cpp +++ llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -2842,28 +2842,8 @@ } void RAGreedy::initializeCSRCost() { - // We use the larger one out of the command-line option and the value report - // by TRI. - CSRCost = BlockFrequency( - std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost())); - if (!CSRCost.getFrequency()) - return; - - // Raw cost is relative to Entry == 2^14; scale it appropriately. - uint64_t ActualEntry = MBFI->getEntryFreq(); - if (!ActualEntry) { - CSRCost = 0; - return; - } - uint64_t FixedEntry = 1 << 14; - if (ActualEntry < FixedEntry) - CSRCost *= BranchProbability(ActualEntry, FixedEntry); - else if (ActualEntry <= UINT32_MAX) - // Invert the fraction and divide. - CSRCost /= BranchProbability(FixedEntry, ActualEntry); - else - // Can't use BranchProbability in general, since it takes 32-bit numbers. - CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); + // We need scale the cost relative to entry freq. + CSRCost = BlockFrequency(MBFI->getEntryFreq() * TRI->getCSRFirstUseCost()); } /// Collect the hint info for \p Reg. Index: llvm/lib/Target/AArch64/AArch64RegisterInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -50,10 +50,9 @@ CallingConv::ID) const override; unsigned getCSRFirstUseCost() const override { - // The cost will be compared against BlockFrequency where entry has the - // value of 1 << 14. A value of 5 will choose to spill or split really - // cold path instead of using a callee-saved register. - return 5; + // If use CSR, the cost is load/store pair in prologue/epilogue. + // So the cost is 1 time spill like the cost calculated in SpillPlacer. + return 1; } const TargetRegisterClass * Index: llvm/lib/Target/PowerPC/PPCRegisterInfo.h =================================================================== --- llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -100,6 +100,12 @@ return true; } + unsigned getCSRFirstUseCost() const override { + // If use CSR, the cost is load/store pair in prologue/epilogue. + // So the cost is 1 time spill like the cost calculated in SpillPlacer. + return 1; + } + void lowerDynamicAlloc(MachineBasicBlock::iterator II) const; void lowerDynamicAreaOffset(MachineBasicBlock::iterator II) const; void lowerCRSpilling(MachineBasicBlock::iterator II, Index: llvm/lib/Target/X86/X86RegisterInfo.h =================================================================== --- llvm/lib/Target/X86/X86RegisterInfo.h +++ llvm/lib/Target/X86/X86RegisterInfo.h @@ -109,6 +109,12 @@ CallingConv::ID) const override; const uint32_t *getNoPreservedMask() const override; + unsigned getCSRFirstUseCost() const override { + // If use CSR, the cost is load/store pair in prologue/epilogue. + // So the cost is 1 time spill like the cost calculated in SpillPlacer. + return 1; + } + // Calls involved in thread-local variable lookup save more registers than // normal calls, so they need a different mask to represent this. const uint32_t *getDarwinTLSCallPreservedMask() const; Index: llvm/test/CodeGen/AArch64/cgp-usubo.ll =================================================================== --- llvm/test/CodeGen/AArch64/cgp-usubo.ll +++ llvm/test/CodeGen/AArch64/cgp-usubo.ll @@ -151,30 +151,33 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, i64* %p, i1 %cond) nounwind { ; CHECK-LABEL: usubo_ult_cmp_dominates_i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #48 // =48 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: mov w20, w3 -; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: tbz w3, #0, .LBB8_3 ; CHECK-NEXT: // %bb.1: // %t ; CHECK-NEXT: cmp x0, x1 -; CHECK-NEXT: mov x22, x0 -; CHECK-NEXT: cset w0, lo -; CHECK-NEXT: mov x19, x2 -; CHECK-NEXT: mov x21, x1 +; CHECK-NEXT: cset w8, lo +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: str x2, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: str w3, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: mov x20, x1 ; CHECK-NEXT: bl call -; CHECK-NEXT: subs x8, x22, x21 +; CHECK-NEXT: ldr w3, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: subs x9, x19, x20 ; CHECK-NEXT: b.hs .LBB8_3 ; CHECK-NEXT: // %bb.2: // %end ; CHECK-NEXT: cset w0, lo -; CHECK-NEXT: str x8, [x19] +; CHECK-NEXT: str x9, [x8] ; CHECK-NEXT: b .LBB8_4 ; CHECK-NEXT: .LBB8_3: // %f -; CHECK-NEXT: and w0, w20, #0x1 +; CHECK-NEXT: and w0, w3, #0x1 ; CHECK-NEXT: .LBB8_4: // %f ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 // =48 ; CHECK-NEXT: ret entry: br i1 %cond, label %t, label %f Index: llvm/test/CodeGen/AArch64/csr-split.ll =================================================================== --- llvm/test/CodeGen/AArch64/csr-split.ll +++ llvm/test/CodeGen/AArch64/csr-split.ll @@ -86,7 +86,6 @@ ; CHECK-NEXT: // %bb.1: // %if.end ; CHECK-NEXT: adrp x8, a ; CHECK-NEXT: ldrsw x8, [x8, :lo12:a] -; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: cmp x8, x0 ; CHECK-NEXT: b.eq .LBB1_3 ; CHECK-NEXT: .LBB1_2: // %return @@ -94,6 +93,7 @@ ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_3: // %if.then2 +; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: bl callVoid ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload @@ -114,7 +114,6 @@ ; CHECK-APPLE-NEXT: adrp x8, _a@PAGE ; CHECK-APPLE-NEXT: Lloh3: ; CHECK-APPLE-NEXT: ldrsw x8, [x8, _a@PAGEOFF] -; CHECK-APPLE-NEXT: mov x19, x0 ; CHECK-APPLE-NEXT: cmp x8, x0 ; CHECK-APPLE-NEXT: b.eq LBB1_3 ; CHECK-APPLE-NEXT: LBB1_2: ; %return @@ -123,6 +122,7 @@ ; CHECK-APPLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload ; CHECK-APPLE-NEXT: ret ; CHECK-APPLE-NEXT: LBB1_3: ; %if.then2 +; CHECK-APPLE-NEXT: mov x19, x0 ; CHECK-APPLE-NEXT: bl _callVoid ; CHECK-APPLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-APPLE-NEXT: mov x0, x19 Index: llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll =================================================================== --- llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll +++ llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll @@ -141,11 +141,12 @@ ; CHECK-NEXT: stw 3, 216(1) ; CHECK-NEXT: lfd 2, 216(1) ; CHECK-NEXT: bl __gcc_qadd@PLT +; CHECK-NEXT: mcrf 0, 2 ; CHECK-NEXT: blt 2, .LBB0_7 ; CHECK-NEXT: # %bb.6: # %bb1 ; CHECK-NEXT: fmr 2, 28 ; CHECK-NEXT: .LBB0_7: # %bb1 -; CHECK-NEXT: blt 2, .LBB0_9 +; CHECK-NEXT: blt 0, .LBB0_9 ; CHECK-NEXT: # %bb.8: # %bb1 ; CHECK-NEXT: fmr 1, 29 ; CHECK-NEXT: .LBB0_9: # %bb1 Index: llvm/test/CodeGen/PowerPC/csr-split.ll =================================================================== --- llvm/test/CodeGen/PowerPC/csr-split.ll +++ llvm/test/CodeGen/PowerPC/csr-split.ll @@ -18,20 +18,21 @@ ; CHECK-PWR9-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-PWR9-NEXT: std r0, 16(r1) ; CHECK-PWR9-NEXT: stdu r1, -48(r1) -; CHECK-PWR9-NEXT: mr r30, r3 -; CHECK-PWR9-NEXT: addis r3, r2, a@toc@ha -; CHECK-PWR9-NEXT: lwa r3, a@toc@l(r3) -; CHECK-PWR9-NEXT: cmpld r3, r30 -; CHECK-PWR9-NEXT: # implicit-def: $r3 +; CHECK-PWR9-NEXT: addis r4, r2, a@toc@ha +; CHECK-PWR9-NEXT: lwa r4, a@toc@l(r4) +; CHECK-PWR9-NEXT: cmpld r4, r3 +; CHECK-PWR9-NEXT: # implicit-def: $r4 ; CHECK-PWR9-NEXT: bne cr0, .LBB0_2 ; CHECK-PWR9-NEXT: # %bb.1: # %if.then +; CHECK-PWR9-NEXT: mr r30, r3 ; CHECK-PWR9-NEXT: bl callVoid ; CHECK-PWR9-NEXT: nop ; CHECK-PWR9-NEXT: mr r3, r30 ; CHECK-PWR9-NEXT: bl callNonVoid ; CHECK-PWR9-NEXT: nop +; CHECK-PWR9-NEXT: mr r4, r3 ; CHECK-PWR9-NEXT: .LBB0_2: # %if.end -; CHECK-PWR9-NEXT: extsw r3, r3 +; CHECK-PWR9-NEXT: extsw r3, r4 ; CHECK-PWR9-NEXT: addi r1, r1, 48 ; CHECK-PWR9-NEXT: ld r0, 16(r1) ; CHECK-PWR9-NEXT: mtlr r0 @@ -49,8 +50,8 @@ ; CHECK-NEXT: addis r4, r2, a@toc@ha ; CHECK-NEXT: std r30, 112(r1) # 8-byte Folded Spill ; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: lwa r4, a@toc@l(r4) -; CHECK-NEXT: cmpld r4, r3 +; CHECK-NEXT: lwa r5, a@toc@l(r4) +; CHECK-NEXT: cmpld r5, r3 ; CHECK-NEXT: # implicit-def: $r3 ; CHECK-NEXT: bne cr0, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %if.then @@ -102,9 +103,9 @@ ; CHECK-PWR9-NEXT: cmpldi r30, 0 ; CHECK-PWR9-NEXT: beq cr0, .LBB1_3 ; CHECK-PWR9-NEXT: # %bb.1: # %if.end -; CHECK-PWR9-NEXT: addis r4, r2, a@toc@ha -; CHECK-PWR9-NEXT: lwa r4, a@toc@l(r4) -; CHECK-PWR9-NEXT: cmpld r4, r30 +; CHECK-PWR9-NEXT: addis r5, r2, a@toc@ha +; CHECK-PWR9-NEXT: lwa r5, a@toc@l(r5) +; CHECK-PWR9-NEXT: cmpld r5, r30 ; CHECK-PWR9-NEXT: bne cr0, .LBB1_3 ; CHECK-PWR9-NEXT: # %bb.2: # %if.then2 ; CHECK-PWR9-NEXT: bl callVoid @@ -134,9 +135,9 @@ ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: beq cr0, .LBB1_3 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: addis r4, r2, a@toc@ha -; CHECK-NEXT: lwa r4, a@toc@l(r4) -; CHECK-NEXT: cmpld r4, r30 +; CHECK-NEXT: addis r5, r2, a@toc@ha +; CHECK-NEXT: lwa r5, a@toc@l(r5) +; CHECK-NEXT: cmpld r5, r30 ; CHECK-NEXT: bne cr0, .LBB1_3 ; CHECK-NEXT: # %bb.2: # %if.then2 ; CHECK-NEXT: bl callVoid Index: llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll =================================================================== --- llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll +++ llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll @@ -15,25 +15,22 @@ ; CHECK-LABEL: tail_dup_break_cfg: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr 0 -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r30, -16 -; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill ; CHECK-NEXT: std 0, 16(1) ; CHECK-NEXT: stdu 1, -48(1) -; CHECK-NEXT: mr 30, 3 -; CHECK-NEXT: andi. 3, 30, 1 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: andi. 4, 3, 1 ; CHECK-NEXT: bc 12, 1, .LBB0_3 ; CHECK-NEXT: # %bb.1: # %test2 -; CHECK-NEXT: andi. 3, 30, 2 +; CHECK-NEXT: andi. 3, 3, 2 ; CHECK-NEXT: bne 0, .LBB0_4 ; CHECK-NEXT: .LBB0_2: # %exit ; CHECK-NEXT: addi 1, 1, 48 ; CHECK-NEXT: ld 0, 16(1) ; CHECK-NEXT: mtlr 0 -; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB0_3: # %body1 +; CHECK-NEXT: std 3, 40(1) # 8-byte Folded Spill ; CHECK-NEXT: bl a ; CHECK-NEXT: nop ; CHECK-NEXT: bl a @@ -42,7 +39,8 @@ ; CHECK-NEXT: nop ; CHECK-NEXT: bl a ; CHECK-NEXT: nop -; CHECK-NEXT: andi. 3, 30, 2 +; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload +; CHECK-NEXT: andi. 3, 3, 2 ; CHECK-NEXT: beq 0, .LBB0_2 ; CHECK-NEXT: .LBB0_4: # %body2 ; CHECK-NEXT: bl b @@ -85,16 +83,14 @@ ; CHECK-LABEL: tail_dup_dont_break_cfg: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr 0 -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r30, -16 -; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill ; CHECK-NEXT: std 0, 16(1) ; CHECK-NEXT: stdu 1, -48(1) -; CHECK-NEXT: mr 30, 3 -; CHECK-NEXT: andi. 3, 30, 1 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: andi. 4, 3, 1 ; CHECK-NEXT: bc 4, 1, .LBB1_2 ; CHECK-NEXT: # %bb.1: # %body1 +; CHECK-NEXT: std 3, 40(1) # 8-byte Folded Spill ; CHECK-NEXT: bl a ; CHECK-NEXT: nop ; CHECK-NEXT: bl a @@ -103,8 +99,9 @@ ; CHECK-NEXT: nop ; CHECK-NEXT: bl a ; CHECK-NEXT: nop +; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload ; CHECK-NEXT: .LBB1_2: # %test2 -; CHECK-NEXT: andi. 3, 30, 2 +; CHECK-NEXT: andi. 3, 3, 2 ; CHECK-NEXT: beq 0, .LBB1_4 ; CHECK-NEXT: # %bb.3: # %body2 ; CHECK-NEXT: bl b @@ -119,7 +116,6 @@ ; CHECK-NEXT: addi 1, 1, 48 ; CHECK-NEXT: ld 0, 16(1) ; CHECK-NEXT: mtlr 0 -; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; CHECK-NEXT: blr entry: br label %test1 @@ -159,14 +155,11 @@ ; CHECK-LABEL: tail_dup_no_succ: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr 0 -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r30, -16 -; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill ; CHECK-NEXT: std 0, 16(1) ; CHECK-NEXT: stdu 1, -48(1) -; CHECK-NEXT: mr 30, 3 -; CHECK-NEXT: andi. 3, 3, 1 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: andi. 4, 3, 1 ; CHECK-NEXT: bc 12, 1, .LBB2_3 ; CHECK-NEXT: .LBB2_1: # %v ; CHECK-NEXT: bl d @@ -177,21 +170,22 @@ ; CHECK-NEXT: addi 1, 1, 48 ; CHECK-NEXT: ld 0, 16(1) ; CHECK-NEXT: mtlr 0 -; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB2_3: # %bb -; CHECK-NEXT: andi. 3, 30, 2 +; CHECK-NEXT: andi. 4, 3, 2 ; CHECK-NEXT: bne 0, .LBB2_5 ; CHECK-NEXT: # %bb.4: # %succ -; CHECK-NEXT: andi. 3, 30, 4 +; CHECK-NEXT: andi. 3, 3, 4 ; CHECK-NEXT: beq 0, .LBB2_2 ; CHECK-NEXT: b .LBB2_1 ; CHECK-NEXT: .LBB2_5: # %c +; CHECK-NEXT: std 3, 40(1) # 8-byte Folded Spill ; CHECK-NEXT: bl c ; CHECK-NEXT: nop ; CHECK-NEXT: bl c ; CHECK-NEXT: nop -; CHECK-NEXT: andi. 3, 30, 4 +; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload +; CHECK-NEXT: andi. 3, 3, 4 ; CHECK-NEXT: beq 0, .LBB2_2 ; CHECK-NEXT: b .LBB2_1 entry: Index: llvm/test/CodeGen/X86/atom-fixup-lea2.ll =================================================================== --- llvm/test/CodeGen/X86/atom-fixup-lea2.ll +++ llvm/test/CodeGen/X86/atom-fixup-lea2.ll @@ -32,17 +32,14 @@ define i32 @test() { ; CHECK-LABEL: test: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushl %ebx -; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: pushl %edi -; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: leal -{{[0-9]+}}(%esp), %esp ; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset %esi, -16 -; CHECK-NEXT: .cfi_offset %edi, -12 -; CHECK-NEXT: .cfi_offset %ebx, -8 +; CHECK-NEXT: .cfi_offset %esi, -12 +; CHECK-NEXT: .cfi_offset %edi, -8 ; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, (%esp) ; CHECK-NEXT: calll getnode @@ -56,30 +53,29 @@ ; CHECK-NEXT: testl %edx, %edx ; CHECK-NEXT: jle .LBB0_6 ; CHECK-NEXT: # %bb.2: # %land.lhs.true2 -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi -; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: testl %esi, %esi ; CHECK-NEXT: jle .LBB0_6 ; CHECK-NEXT: # %bb.3: # %land.lhs.true4 +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK-NEXT: testl %esi, %esi ; CHECK-NEXT: jle .LBB0_6 ; CHECK-NEXT: # %bb.4: # %land.lhs.true7 -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx -; CHECK-NEXT: testl %ebx, %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: jle .LBB0_6 ; CHECK-NEXT: # %bb.5: # %if.then -; CHECK-NEXT: leal (%esi,%edi), %esi +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; CHECK-NEXT: leal (%edx,%ecx), %edx -; CHECK-NEXT: leal (%esi,%ebx), %esi +; CHECK-NEXT: leal (%esi,%edi), %esi ; CHECK-NEXT: movl (%edx,%esi,4), %eax ; CHECK-NEXT: .LBB0_6: # %if.end ; CHECK-NEXT: leal {{[0-9]+}}(%esp), %esp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: popl %esi ; CHECK-NEXT: .cfi_def_cfa_offset 12 -; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %esi ; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: popl %ebx +; CHECK-NEXT: popl %edi ; CHECK-NEXT: .cfi_def_cfa_offset 4 ; CHECK-NEXT: retl entry: Index: llvm/test/CodeGen/X86/block-placement.ll =================================================================== --- llvm/test/CodeGen/X86/block-placement.ll +++ llvm/test/CodeGen/X86/block-placement.ll @@ -9,111 +9,114 @@ ; that is not expected to run. ; CHECK-LABEL: test_ifchains: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushl %ebx -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: pushl %edi -; CHECK-NEXT: .cfi_def_cfa_offset 12 -; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %esi, -16 -; CHECK-NEXT: .cfi_offset %edi, -12 -; CHECK-NEXT: .cfi_offset %ebx, -8 -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi -; CHECK-NEXT: cmpl $2, 4(%ebx) +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: cmpl $2, 4(%edx) ; CHECK-NEXT: jae .LBB0_1 ; CHECK-NEXT: # %bb.2: # %else1 -; CHECK-NEXT: cmpl $3, 8(%ebx) +; CHECK-NEXT: cmpl $3, 8(%edx) ; CHECK-NEXT: jae .LBB0_3 ; CHECK-NEXT: .LBB0_4: # %else2 -; CHECK-NEXT: cmpl $4, 12(%ebx) +; CHECK-NEXT: cmpl $4, 12(%edx) ; CHECK-NEXT: jae .LBB0_5 ; CHECK-NEXT: .LBB0_6: # %else3 -; CHECK-NEXT: cmpl $5, 16(%ebx) +; CHECK-NEXT: cmpl $5, 16(%edx) ; CHECK-NEXT: jae .LBB0_7 ; CHECK-NEXT: .LBB0_8: # %else4 -; CHECK-NEXT: cmpl $4, 12(%ebx) +; CHECK-NEXT: cmpl $4, 12(%edx) ; CHECK-NEXT: jae .LBB0_9 ; CHECK-NEXT: .LBB0_10: # %exit -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: popl %esi -; CHECK-NEXT: .cfi_def_cfa_offset 12 -; CHECK-NEXT: popl %edi -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: popl %ebx +; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 4 ; CHECK-NEXT: retl ; CHECK-NEXT: .LBB0_1: # %then1 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: subl $4, %esp ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 -; CHECK-NEXT: pushl %esi +; CHECK-NEXT: pushl %eax ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 ; CHECK-NEXT: pushl $1 ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 -; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %ecx ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 ; CHECK-NEXT: calll error +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: addl $16, %esp ; CHECK-NEXT: .cfi_adjust_cfa_offset -16 -; CHECK-NEXT: cmpl $3, 8(%ebx) +; CHECK-NEXT: cmpl $3, 8(%edx) ; CHECK-NEXT: jb .LBB0_4 ; CHECK-NEXT: .LBB0_3: # %then2 ; CHECK-NEXT: subl $4, %esp ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 -; CHECK-NEXT: pushl %esi +; CHECK-NEXT: pushl %eax ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 ; CHECK-NEXT: pushl $1 ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 -; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %ecx ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 ; CHECK-NEXT: calll error +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: addl $16, %esp ; CHECK-NEXT: .cfi_adjust_cfa_offset -16 -; CHECK-NEXT: cmpl $4, 12(%ebx) +; CHECK-NEXT: cmpl $4, 12(%edx) ; CHECK-NEXT: jb .LBB0_6 ; CHECK-NEXT: .LBB0_5: # %then3 ; CHECK-NEXT: subl $4, %esp ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 -; CHECK-NEXT: pushl %esi +; CHECK-NEXT: pushl %eax ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 ; CHECK-NEXT: pushl $1 ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 -; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %ecx ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 ; CHECK-NEXT: calll error +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: addl $16, %esp ; CHECK-NEXT: .cfi_adjust_cfa_offset -16 -; CHECK-NEXT: cmpl $5, 16(%ebx) +; CHECK-NEXT: cmpl $5, 16(%edx) ; CHECK-NEXT: jb .LBB0_8 ; CHECK-NEXT: .LBB0_7: # %then4 ; CHECK-NEXT: subl $4, %esp ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 -; CHECK-NEXT: pushl %esi +; CHECK-NEXT: pushl %eax ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 ; CHECK-NEXT: pushl $1 ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 -; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %ecx ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 ; CHECK-NEXT: calll error +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: addl $16, %esp ; CHECK-NEXT: .cfi_adjust_cfa_offset -16 -; CHECK-NEXT: cmpl $4, 12(%ebx) +; CHECK-NEXT: cmpl $4, 12(%edx) ; CHECK-NEXT: jb .LBB0_10 ; CHECK-NEXT: .LBB0_9: # %then5 ; CHECK-NEXT: subl $4, %esp ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 -; CHECK-NEXT: pushl %esi +; CHECK-NEXT: pushl %eax ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 ; CHECK-NEXT: pushl $1 ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 -; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %ecx ; CHECK-NEXT: .cfi_adjust_cfa_offset 4 ; CHECK-NEXT: calll error +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: addl $16, %esp ; CHECK-NEXT: .cfi_adjust_cfa_offset -16 -; CHECK-NEXT: jmp .LBB0_10 +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: %gep1 = getelementptr i32, i32* %a, i32 1 @@ -2224,28 +2227,25 @@ ; at the bottom. ; CHECK-LABEL: test_cold_calls: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %esi, -8 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl 4(%eax), %esi -; CHECK-NEXT: cmpl $2, %esi +; CHECK-NEXT: movl 4(%eax), %ecx +; CHECK-NEXT: cmpl $2, %ecx ; CHECK-NEXT: jae .LBB20_1 ; CHECK-NEXT: # %bb.2: # %else -; CHECK-NEXT: movl 8(%eax), %esi -; CHECK-NEXT: .LBB20_3: # %exit -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: addl $8, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: popl %esi +; CHECK-NEXT: movl 8(%eax), %eax +; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 4 ; CHECK-NEXT: retl ; CHECK-NEXT: .LBB20_1: # %then ; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: calll cold_function -; CHECK-NEXT: jmp .LBB20_3 +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: %gep1 = getelementptr i32, i32* %a, i32 1 @@ -2818,52 +2818,41 @@ ; loop chain .slow does not have afallthrough to .header. ; CHECK-LABEL: not_rotate_if_extra_branch: ; CHECK: # %bb.0: # %.entry -; CHECK-NEXT: pushl %ebx -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: pushl %edi -; CHECK-NEXT: .cfi_def_cfa_offset 12 -; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: subl $16, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset %esi, -16 -; CHECK-NEXT: .cfi_offset %edi, -12 -; CHECK-NEXT: .cfi_offset %ebx, -8 -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi -; CHECK-NEXT: leal (%edi,%edi), %esi -; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: leal (%ecx,%ecx), %eax +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: .LBB30_1: # %.header ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cmpl $9000001, %ebx # imm = 0x895441 -; CHECK-NEXT: jge .LBB30_2 -; CHECK-NEXT: # %bb.3: # %.middle +; CHECK-NEXT: cmpl $9000001, %edx # imm = 0x895441 +; CHECK-NEXT: jge .LBB30_5 +; CHECK-NEXT: # %bb.2: # %.middle ; CHECK-NEXT: # in Loop: Header=BB30_1 Depth=1 -; CHECK-NEXT: testl $1023, %ebx # imm = 0x3FF +; CHECK-NEXT: testl $1023, %edx # imm = 0x3FF ; CHECK-NEXT: je .LBB30_4 -; CHECK-NEXT: .LBB30_5: # %.backedge +; CHECK-NEXT: .LBB30_3: # %.backedge ; CHECK-NEXT: # in Loop: Header=BB30_1 Depth=1 -; CHECK-NEXT: addl %ebx, %esi -; CHECK-NEXT: incl %ebx -; CHECK-NEXT: cmpl %edi, %ebx +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: incl %edx +; CHECK-NEXT: cmpl %ecx, %edx ; CHECK-NEXT: jl .LBB30_1 ; CHECK-NEXT: jmp .LBB30_6 ; CHECK-NEXT: .LBB30_4: # %.slow ; CHECK-NEXT: # in Loop: Header=BB30_1 Depth=1 -; CHECK-NEXT: movl %esi, (%esp) +; CHECK-NEXT: movl %eax, (%esp) +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: calll effect -; CHECK-NEXT: jmp .LBB30_5 -; CHECK-NEXT: .LBB30_2: # %.bailout -; CHECK-NEXT: incl %edi -; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: jmp .LBB30_3 +; CHECK-NEXT: .LBB30_5: # %.bailout +; CHECK-NEXT: incl %ecx +; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: .LBB30_6: # %.stop -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: addl $16, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: popl %esi -; CHECK-NEXT: .cfi_def_cfa_offset 12 -; CHECK-NEXT: popl %edi -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: popl %ebx +; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 4 ; CHECK-NEXT: retl .entry: @@ -2905,66 +2894,58 @@ ; it introduce an extra btanch. ; CHECK-LABEL: not_rotate_if_extra_branch_regression: ; CHECK: # %bb.0: # %.entry -; CHECK-NEXT: pushl %ebx -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: pushl %edi -; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: subl $16, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset %esi, -16 -; CHECK-NEXT: .cfi_offset %edi, -12 -; CHECK-NEXT: .cfi_offset %ebx, -8 -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi -; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: .LBB31_1: # %.first_header ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cmpl $9000000, %esi # imm = 0x895440 -; CHECK-NEXT: jg .LBB31_8 +; CHECK-NEXT: cmpl $9000000, %eax # imm = 0x895440 +; CHECK-NEXT: jg .LBB31_9 ; CHECK-NEXT: # %bb.2: # %.first_backedge ; CHECK-NEXT: # in Loop: Header=BB31_1 Depth=1 -; CHECK-NEXT: incl %esi -; CHECK-NEXT: cmpl %edi, %esi +; CHECK-NEXT: incl %eax +; CHECK-NEXT: cmpl %ecx, %eax ; CHECK-NEXT: jl .LBB31_1 ; CHECK-NEXT: .LBB31_3: # %.second_header ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl %eax, %ebx -; CHECK-NEXT: cmpl %edi, %eax +; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: cmpl %ecx, %edx ; CHECK-NEXT: jg .LBB31_7 ; CHECK-NEXT: # %bb.4: # %.second_middle ; CHECK-NEXT: # in Loop: Header=BB31_3 Depth=1 -; CHECK-NEXT: cmpl $9000001, %ebx # imm = 0x895441 +; CHECK-NEXT: cmpl $9000001, %esi # imm = 0x895441 ; CHECK-NEXT: jge .LBB31_5 ; CHECK-NEXT: .LBB31_6: # %.second_backedge ; CHECK-NEXT: # in Loop: Header=BB31_3 Depth=1 -; CHECK-NEXT: leal 1(%ebx), %eax -; CHECK-NEXT: cmpl $10000000, %ebx # imm = 0x989680 +; CHECK-NEXT: leal 1(%esi), %edx +; CHECK-NEXT: cmpl $10000000, %esi # imm = 0x989680 ; CHECK-NEXT: jl .LBB31_3 ; CHECK-NEXT: jmp .LBB31_7 ; CHECK-NEXT: .LBB31_5: # %.slow ; CHECK-NEXT: # in Loop: Header=BB31_3 Depth=1 -; CHECK-NEXT: movl %ebx, (%esp) +; CHECK-NEXT: movl %esi, (%esp) +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: calll effect +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; CHECK-NEXT: jmp .LBB31_6 ; CHECK-NEXT: .LBB31_7: # %.stop -; CHECK-NEXT: addl %ebx, %esi -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: .LBB31_9: # %.bailout -; CHECK-NEXT: addl $16, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: popl %esi -; CHECK-NEXT: .cfi_def_cfa_offset 12 -; CHECK-NEXT: popl %edi +; CHECK-NEXT: addl %esi, %eax +; CHECK-NEXT: .LBB31_8: # %.stop +; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: popl %ebx +; CHECK-NEXT: popl %esi ; CHECK-NEXT: .cfi_def_cfa_offset 4 ; CHECK-NEXT: retl -; CHECK-NEXT: .LBB31_8: # %.bailout -; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .LBB31_9: # %.bailout +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: jmp .LBB31_9 +; CHECK-NEXT: jmp .LBB31_8 .entry: %sum.0 = shl nsw i32 %count, 1 br label %.first_header Index: llvm/test/CodeGen/X86/bmi.ll =================================================================== --- llvm/test/CodeGen/X86/bmi.ll +++ llvm/test/CodeGen/X86/bmi.ll @@ -1056,31 +1056,39 @@ define i32 @blsr32_branch(i32 %x) { ; X86-LABEL: blsr32_branch: ; X86: # %bb.0: -; X86-NEXT: pushl %esi +; X86-NEXT: pushl %eax ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %esi, -8 -; X86-NEXT: blsrl {{[0-9]+}}(%esp), %esi -; X86-NEXT: jne .LBB46_2 -; X86-NEXT: # %bb.1: +; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax +; X86-NEXT: je .LBB46_1 +; X86-NEXT: # %bb.2: +; X86-NEXT: popl %ecx +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; X86-NEXT: .LBB46_1: +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: calll bar -; X86-NEXT: .LBB46_2: -; X86-NEXT: movl %esi, %eax -; X86-NEXT: popl %esi +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: popl %ecx ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X64-LABEL: blsr32_branch: ; X64: # %bb.0: -; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: blsrl %edi, %eax +; X64-NEXT: je .LBB46_1 +; X64-NEXT: # %bb.2: +; X64-NEXT: popq %rcx +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; X64-NEXT: .LBB46_1: ; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: .cfi_offset %rbx, -16 -; X64-NEXT: blsrl %edi, %ebx -; X64-NEXT: jne .LBB46_2 -; X64-NEXT: # %bb.1: +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: callq bar -; X64-NEXT: .LBB46_2: -; X64-NEXT: movl %ebx, %eax -; X64-NEXT: popq %rbx +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; X64-NEXT: popq %rcx ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq %tmp = sub i32 %x, 1 @@ -1096,46 +1104,51 @@ define i64 @blsr64_branch(i64 %x) { ; X86-LABEL: blsr64_branch: ; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: pushl %esi -; X86-NEXT: .cfi_def_cfa_offset 12 -; X86-NEXT: .cfi_offset %esi, -12 -; X86-NEXT: .cfi_offset %edi, -8 -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: subl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %eax, %esi -; X86-NEXT: addl $-1, %esi -; X86-NEXT: movl %ecx, %edi -; X86-NEXT: adcl $-1, %edi -; X86-NEXT: andl %eax, %esi -; X86-NEXT: andl %ecx, %edi -; X86-NEXT: movl %esi, %eax -; X86-NEXT: orl %edi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: addl $-1, %eax +; X86-NEXT: movl %esi, %edx +; X86-NEXT: adcl $-1, %edx +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: andl %esi, %edx +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: orl %edx, %ecx ; X86-NEXT: jne .LBB47_2 ; X86-NEXT: # %bb.1: +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, (%esp) # 4-byte Spill ; X86-NEXT: calll bar +; X86-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: .LBB47_2: -; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl %edi, %edx -; X86-NEXT: popl %esi +; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: popl %edi +; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X64-LABEL: blsr64_branch: ; X64: # %bb.0: -; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: blsrq %rdi, %rax +; X64-NEXT: je .LBB47_1 +; X64-NEXT: # %bb.2: +; X64-NEXT: popq %rcx +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; X64-NEXT: .LBB47_1: ; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: .cfi_offset %rbx, -16 -; X64-NEXT: blsrq %rdi, %rbx -; X64-NEXT: jne .LBB47_2 -; X64-NEXT: # %bb.1: +; X64-NEXT: movq %rax, (%rsp) # 8-byte Spill ; X64-NEXT: callq bar -; X64-NEXT: .LBB47_2: -; X64-NEXT: movq %rbx, %rax -; X64-NEXT: popq %rbx +; X64-NEXT: movq (%rsp), %rax # 8-byte Reload +; X64-NEXT: popq %rcx ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq %tmp = sub i64 %x, 1 @@ -1151,31 +1164,39 @@ define i32 @blsi32_branch(i32 %x) { ; X86-LABEL: blsi32_branch: ; X86: # %bb.0: -; X86-NEXT: pushl %esi +; X86-NEXT: pushl %eax ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %esi, -8 -; X86-NEXT: blsil {{[0-9]+}}(%esp), %esi -; X86-NEXT: jne .LBB48_2 -; X86-NEXT: # %bb.1: +; X86-NEXT: blsil {{[0-9]+}}(%esp), %eax +; X86-NEXT: je .LBB48_1 +; X86-NEXT: # %bb.2: +; X86-NEXT: popl %ecx +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; X86-NEXT: .LBB48_1: +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: calll bar -; X86-NEXT: .LBB48_2: -; X86-NEXT: movl %esi, %eax -; X86-NEXT: popl %esi +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: popl %ecx ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X64-LABEL: blsi32_branch: ; X64: # %bb.0: -; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %rax ; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: .cfi_offset %rbx, -16 -; X64-NEXT: blsil %edi, %ebx -; X64-NEXT: jne .LBB48_2 -; X64-NEXT: # %bb.1: +; X64-NEXT: blsil %edi, %eax +; X64-NEXT: je .LBB48_1 +; X64-NEXT: # %bb.2: +; X64-NEXT: popq %rcx +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; X64-NEXT: .LBB48_1: +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: callq bar -; X64-NEXT: .LBB48_2: -; X64-NEXT: movl %ebx, %eax -; X64-NEXT: popq %rbx +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; X64-NEXT: popq %rcx ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq %tmp = sub i32 0, %x @@ -1191,46 +1212,51 @@ define i64 @blsi64_branch(i64 %x) { ; X86-LABEL: blsi64_branch: ; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: pushl %esi -; X86-NEXT: .cfi_def_cfa_offset 12 -; X86-NEXT: .cfi_offset %esi, -12 -; X86-NEXT: .cfi_offset %edi, -8 -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: subl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: movl %eax, %edi -; X86-NEXT: negl %edi -; X86-NEXT: sbbl %ecx, %esi -; X86-NEXT: andl %ecx, %esi -; X86-NEXT: andl %eax, %edi -; X86-NEXT: movl %edi, %eax -; X86-NEXT: orl %esi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: negl %eax +; X86-NEXT: sbbl %esi, %edx +; X86-NEXT: andl %esi, %edx +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: orl %edx, %ecx ; X86-NEXT: jne .LBB49_2 ; X86-NEXT: # %bb.1: +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: calll bar +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: .LBB49_2: -; X86-NEXT: movl %edi, %eax -; X86-NEXT: movl %esi, %edx -; X86-NEXT: popl %esi +; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: popl %edi +; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X64-LABEL: blsi64_branch: ; X64: # %bb.0: -; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: blsiq %rdi, %rax +; X64-NEXT: je .LBB49_1 +; X64-NEXT: # %bb.2: +; X64-NEXT: popq %rcx +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; X64-NEXT: .LBB49_1: ; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: .cfi_offset %rbx, -16 -; X64-NEXT: blsiq %rdi, %rbx -; X64-NEXT: jne .LBB49_2 -; X64-NEXT: # %bb.1: +; X64-NEXT: movq %rax, (%rsp) # 8-byte Spill ; X64-NEXT: callq bar -; X64-NEXT: .LBB49_2: -; X64-NEXT: movq %rbx, %rax -; X64-NEXT: popq %rbx +; X64-NEXT: movq (%rsp), %rax # 8-byte Reload +; X64-NEXT: popq %rcx ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq %tmp = sub i64 0, %x Index: llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll =================================================================== --- llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll +++ llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll @@ -12,70 +12,67 @@ ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: pushq %r15 ; CHECK-NEXT: pushq %r14 -; CHECK-NEXT: pushq %r13 ; CHECK-NEXT: pushq %r12 ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: movl %edx, %ebx -; CHECK-NEXT: movl %esi, %r12d -; CHECK-NEXT: movq %rdi, %r15 +; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: movl %edx, %r14d +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movq %rdi, %rbp ; CHECK-NEXT: callq c -; CHECK-NEXT: movl %eax, %r13d -; CHECK-NEXT: movq %r15, %rdi +; CHECK-NEXT: movl %eax, %r15d +; CHECK-NEXT: movq %rbp, %rdi ; CHECK-NEXT: callq l ; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: je .LBB0_1 ; CHECK-NEXT: .LBB0_10: # %cleanup -; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: addq $16, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 -; CHECK-NEXT: popq %r13 ; CHECK-NEXT: popq %r14 ; CHECK-NEXT: popq %r15 ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB0_1: # %if.end -; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %ebx, %eax +; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: cmpl $0, {{.*}}(%rip) +; CHECK-NEXT: # implicit-def: $r12d ; CHECK-NEXT: # implicit-def: $ebx -; CHECK-NEXT: # implicit-def: $r14d ; CHECK-NEXT: je .LBB0_4 ; CHECK-NEXT: # %bb.2: # %if.then4 -; CHECK-NEXT: movslq %r12d, %rdi +; CHECK-NEXT: movslq %eax, %rdi ; CHECK-NEXT: callq m -; CHECK-NEXT: # implicit-def: $ebx +; CHECK-NEXT: # implicit-def: $r12d ; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: .LBB0_3: # %r ; CHECK-NEXT: callq c -; CHECK-NEXT: movl %ebp, %r14d +; CHECK-NEXT: movl %ebp, %ebx ; CHECK-NEXT: .LBB0_4: # %if.end8 -; CHECK-NEXT: movl %ebx, %edi +; CHECK-NEXT: movl %r12d, %edi ; CHECK-NEXT: callq i ; CHECK-NEXT: movl %eax, %ebp -; CHECK-NEXT: orl %r14d, %ebp -; CHECK-NEXT: testl %r13d, %r13d +; CHECK-NEXT: orl %ebx, %ebp +; CHECK-NEXT: testl %r15d, %r15d ; CHECK-NEXT: je .LBB0_6 ; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: andl $4, %ebx +; CHECK-NEXT: andl $4, %r12d ; CHECK-NEXT: jmp .LBB0_3 ; CHECK-NEXT: .LBB0_6: # %if.end12 ; CHECK-NEXT: testl %ebp, %ebp ; CHECK-NEXT: je .LBB0_9 ; CHECK-NEXT: # %bb.7: # %if.then14 -; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: jmp .LBB0_10 ; CHECK-NEXT: .Ltmp0: # Block address taken ; CHECK-NEXT: .LBB0_8: # %if.then20.critedge ; CHECK-NEXT: movl {{.*}}(%rip), %edi -; CHECK-NEXT: movslq %eax, %rcx +; CHECK-NEXT: movslq %r14d, %rcx ; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: movq %r15, %rdx -; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: addq $16, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 -; CHECK-NEXT: popq %r13 ; CHECK-NEXT: popq %r14 ; CHECK-NEXT: popq %r15 ; CHECK-NEXT: popq %rbp Index: llvm/test/CodeGen/X86/cgp-usubo.ll =================================================================== --- llvm/test/CodeGen/X86/cgp-usubo.ll +++ llvm/test/CodeGen/X86/cgp-usubo.ll @@ -162,36 +162,34 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, i64* %p, i1 %cond) nounwind { ; CHECK-LABEL: usubo_ult_cmp_dominates_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: pushq %r15 -; CHECK-NEXT: pushq %r14 ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: movl %ecx, %ebp -; CHECK-NEXT: testb $1, %bpl +; CHECK-NEXT: subq $32, %rsp +; CHECK-NEXT: testb $1, %cl ; CHECK-NEXT: je .LBB9_2 ; CHECK-NEXT: # %bb.1: # %t -; CHECK-NEXT: movq %rdx, %r14 -; CHECK-NEXT: movq %rsi, %r15 -; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: cmpq %rsi, %rbx -; CHECK-NEXT: setb %dil +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: setb %al +; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movq %rsi, %rbx ; CHECK-NEXT: callq call -; CHECK-NEXT: subq %r15, %rbx +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; CHECK-NEXT: subq %rbx, %rsi ; CHECK-NEXT: jae .LBB9_2 ; CHECK-NEXT: # %bb.4: # %end ; CHECK-NEXT: setb %al -; CHECK-NEXT: movq %rbx, (%r14) +; CHECK-NEXT: movq %rsi, (%rdx) ; CHECK-NEXT: jmp .LBB9_3 ; CHECK-NEXT: .LBB9_2: # %f -; CHECK-NEXT: movl %ebp, %eax +; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: .LBB9_3: # %f -; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: addq $32, %rsp ; CHECK-NEXT: popq %rbx -; CHECK-NEXT: popq %r14 -; CHECK-NEXT: popq %r15 -; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq entry: br i1 %cond, label %t, label %f Index: llvm/test/CodeGen/X86/csr-split.ll =================================================================== --- llvm/test/CodeGen/X86/csr-split.ll +++ llvm/test/CodeGen/X86/csr-split.ll @@ -75,7 +75,6 @@ ; CHECK-NEXT: testq %rdi, %rdi ; CHECK-NEXT: je .LBB1_2 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: movslq {{.*}}(%rip), %rax ; CHECK-NEXT: cmpq %rdi, %rax ; CHECK-NEXT: je .LBB1_3 @@ -86,6 +85,7 @@ ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB1_3: # %if.then2 ; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: callq callVoid ; CHECK-NEXT: movq %rbx, %rdi ; CHECK-NEXT: popq %rbx Index: llvm/test/CodeGen/X86/fp128-cast.ll =================================================================== --- llvm/test/CodeGen/X86/fp128-cast.ll +++ llvm/test/CodeGen/X86/fp128-cast.ll @@ -1099,18 +1099,18 @@ ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: subl $36, %esp -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: cmpl $50001, {{[0-9]+}}(%esp) # imm = 0xC351 ; X32-NEXT: jl .LBB22_4 ; X32-NEXT: # %bb.1: # %if.then -; X32-NEXT: pushl %eax ; X32-NEXT: pushl %ecx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %edx +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi ; X32-NEXT: calll __trunctfdf2 ; X32-NEXT: addl $16, %esp ; X32-NEXT: fstpl {{[0-9]+}}(%esp) @@ -1129,16 +1129,16 @@ ; X32-NEXT: fstpl {{[0-9]+}}(%esp) ; X32-NEXT: calll __extenddftf2 ; X32-NEXT: addl $12, %esp -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: .LBB22_4: # %cleanup -; X32-NEXT: movl %edx, (%esi) -; X32-NEXT: movl %edi, 4(%esi) -; X32-NEXT: movl %ecx, 8(%esi) -; X32-NEXT: movl %eax, 12(%esi) -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %esi, (%eax) +; X32-NEXT: movl %edi, 4(%eax) +; X32-NEXT: movl %edx, 8(%eax) +; X32-NEXT: movl %ecx, 12(%eax) ; X32-NEXT: addl $36, %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi Index: llvm/test/CodeGen/X86/peep-test-4.ll =================================================================== --- llvm/test/CodeGen/X86/peep-test-4.ll +++ llvm/test/CodeGen/X86/peep-test-4.ll @@ -263,15 +263,17 @@ define void @testCTZ3(i32 %v) nounwind { ; CHECK-LABEL: testCTZ3: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: tzcntl %edi, %ebx -; CHECK-NEXT: jae .LBB13_2 -; CHECK-NEXT: # %bb.1: # %bb -; CHECK-NEXT: movl %ebx, %edi +; CHECK-NEXT: tzcntl %edi, %edi +; CHECK-NEXT: jb .LBB13_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: jmp foo32 # TAILCALL +; CHECK-NEXT: .LBB13_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload ; CHECK-NEXT: callq foo -; CHECK-NEXT: .LBB13_2: # %return -; CHECK-NEXT: movl %ebx, %edi -; CHECK-NEXT: popq %rbx +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: jmp foo32 # TAILCALL %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) %cmp = icmp ne i32 %v, 0 Index: llvm/test/CodeGen/X86/ragreedy-bug.ll =================================================================== --- llvm/test/CodeGen/X86/ragreedy-bug.ll +++ llvm/test/CodeGen/X86/ragreedy-bug.ll @@ -23,245 +23,243 @@ define fastcc i32 @prune_match(%struct.Connector_struct* nocapture readonly %a, %struct.Connector_struct* nocapture readonly %b) #9 { ; CHECK-LABEL: prune_match: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: pushq %r15 -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: pushq %r14 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: pushq %r13 -; CHECK-NEXT: .cfi_def_cfa_offset 40 -; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: subq $40, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 56 -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset %rbx, -56 -; CHECK-NEXT: .cfi_offset %r12, -48 -; CHECK-NEXT: .cfi_offset %r13, -40 -; CHECK-NEXT: .cfi_offset %r14, -32 -; CHECK-NEXT: .cfi_offset %r15, -24 -; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movzwl (%rdi), %eax ; CHECK-NEXT: cmpw (%rsi), %ax -; CHECK-NEXT: jne LBB0_20 +; CHECK-NEXT: jne LBB0_46 ; CHECK-NEXT: ## %bb.1: ## %if.end -; CHECK-NEXT: movq %rdi, %r12 -; CHECK-NEXT: movb 4(%rdi), %cl -; CHECK-NEXT: movb 4(%rsi), %dl -; CHECK-NEXT: movq 16(%rdi), %r14 -; CHECK-NEXT: movq 16(%rsi), %r15 -; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: movq __DefaultRuneLocale@{{.*}}(%rip), %rbp -; CHECK-NEXT: movl $32768, %r13d ## imm = 0x8000 +; CHECK-NEXT: movb 4(%rdi), %r9b +; CHECK-NEXT: movb 4(%rsi), %r10b +; CHECK-NEXT: movq 16(%rdi), %r11 +; CHECK-NEXT: movq 16(%rsi), %r8 +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: movq __DefaultRuneLocale@{{.*}}(%rip), %rsi +; CHECK-NEXT: movl $32768, %ecx ## imm = 0x8000 ; CHECK-NEXT: LBB0_2: ## %while.cond ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movsbq (%r14,%rbx), %rdi -; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: movsbq (%r11,%rdx), %rax +; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: js LBB0_8 ; CHECK-NEXT: ## %bb.3: ## %cond.true.i.i ; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: movl 60(%rbp,%rdi,4), %eax -; CHECK-NEXT: andl %r13d, %eax +; CHECK-NEXT: movl 60(%rsi,%rax,4), %eax +; CHECK-NEXT: andl %ecx, %eax ; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: jne LBB0_6 ; CHECK-NEXT: LBB0_4: ## %lor.rhs ; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: movsbq (%rbx,%r15), %rdi -; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: movsbq (%rdx,%r8), %rax +; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: js LBB0_9 ; CHECK-NEXT: ## %bb.5: ## %cond.true.i.i217 ; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: movl 60(%rbp,%rdi,4), %eax -; CHECK-NEXT: andl %r13d, %eax +; CHECK-NEXT: movl 60(%rsi,%rax,4), %eax +; CHECK-NEXT: andl %ecx, %eax ; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: je LBB0_10 ; CHECK-NEXT: LBB0_6: ## %while.body ; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: movzbl (%rbx,%r14), %eax -; CHECK-NEXT: cmpb (%rbx,%r15), %al -; CHECK-NEXT: jne LBB0_20 +; CHECK-NEXT: movzbl (%rdx,%r11), %eax +; CHECK-NEXT: cmpb (%rdx,%r8), %al +; CHECK-NEXT: jne LBB0_46 ; CHECK-NEXT: ## %bb.7: ## %if.end17 ; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: incq %rbx +; CHECK-NEXT: incq %rdx ; CHECK-NEXT: jmp LBB0_2 ; CHECK-NEXT: LBB0_8: ## %cond.false.i.i ; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: movl $32768, %esi ## imm = 0x8000 -; CHECK-NEXT: ## kill: def $edi killed $edi killed $rdi -; CHECK-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill -; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill +; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill +; CHECK-NEXT: movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill ; CHECK-NEXT: callq ___maskrune -; CHECK-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx ## 1-byte Folded Reload -; CHECK-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx ## 1-byte Folded Reload +; CHECK-NEXT: movl $32768, %ecx ## imm = 0x8000 +; CHECK-NEXT: movq __DefaultRuneLocale@{{.*}}(%rip), %rsi +; CHECK-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r10d ## 1-byte Folded Reload +; CHECK-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r9d ## 1-byte Folded Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload ; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: je LBB0_4 ; CHECK-NEXT: jmp LBB0_6 ; CHECK-NEXT: LBB0_9: ## %cond.false.i.i219 ; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: movl $32768, %esi ## imm = 0x8000 -; CHECK-NEXT: ## kill: def $edi killed $edi killed $rdi -; CHECK-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill -; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill +; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill +; CHECK-NEXT: movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill ; CHECK-NEXT: callq ___maskrune -; CHECK-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %edx ## 1-byte Folded Reload -; CHECK-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx ## 1-byte Folded Reload +; CHECK-NEXT: movl $32768, %ecx ## imm = 0x8000 +; CHECK-NEXT: movq __DefaultRuneLocale@{{.*}}(%rip), %rsi +; CHECK-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r10d ## 1-byte Folded Reload +; CHECK-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r9d ## 1-byte Folded Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload ; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: jne LBB0_6 ; CHECK-NEXT: LBB0_10: ## %while.end -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: orb %cl, %al -; CHECK-NEXT: jne LBB0_28 +; CHECK-NEXT: movl %r10d, %eax +; CHECK-NEXT: orb %r9b, %al +; CHECK-NEXT: jne LBB0_26 ; CHECK-NEXT: ## %bb.11: ## %if.then23 -; CHECK-NEXT: movq 16(%r12), %rdx -; CHECK-NEXT: cmpb $83, (%rdx) -; CHECK-NEXT: movb (%r14,%rbx), %cl -; CHECK-NEXT: je LBB0_22 +; CHECK-NEXT: movq 16(%rdi), %rcx +; CHECK-NEXT: cmpb $83, (%rcx) +; CHECK-NEXT: movb (%r11,%rdx), %sil +; CHECK-NEXT: je LBB0_21 ; CHECK-NEXT: LBB0_12: ## %while.cond59.preheader ; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: LBB0_13: ## %while.cond59.preheader -; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: je LBB0_21 +; CHECK-NEXT: testb %sil, %sil +; CHECK-NEXT: je LBB0_25 ; CHECK-NEXT: LBB0_14: ## %land.rhs ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movzbl (%r15,%rbx), %edx +; CHECK-NEXT: movzbl (%r8,%rdx), %ecx ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je LBB0_21 +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: je LBB0_25 ; CHECK-NEXT: ## %bb.15: ## %while.body66 ; CHECK-NEXT: ## in Loop: Header=BB0_14 Depth=1 -; CHECK-NEXT: cmpb $42, %cl +; CHECK-NEXT: cmpb $42, %sil ; CHECK-NEXT: je LBB0_19 ; CHECK-NEXT: ## %bb.16: ## %while.body66 ; CHECK-NEXT: ## in Loop: Header=BB0_14 Depth=1 -; CHECK-NEXT: cmpb $42, %dl +; CHECK-NEXT: cmpb $42, %cl ; CHECK-NEXT: je LBB0_19 ; CHECK-NEXT: ## %bb.17: ## %lor.lhs.false74 ; CHECK-NEXT: ## in Loop: Header=BB0_14 Depth=1 ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpb %dl, %cl -; CHECK-NEXT: jne LBB0_21 +; CHECK-NEXT: cmpb %cl, %sil +; CHECK-NEXT: jne LBB0_25 ; CHECK-NEXT: ## %bb.18: ## %lor.lhs.false74 ; CHECK-NEXT: ## in Loop: Header=BB0_14 Depth=1 -; CHECK-NEXT: cmpb $94, %cl -; CHECK-NEXT: je LBB0_21 +; CHECK-NEXT: cmpb $94, %sil +; CHECK-NEXT: je LBB0_25 ; CHECK-NEXT: LBB0_19: ## %if.then83 ; CHECK-NEXT: ## in Loop: Header=BB0_14 Depth=1 -; CHECK-NEXT: movzbl 1(%r14,%rbx), %ecx -; CHECK-NEXT: incq %rbx -; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: movzbl 1(%r11,%rdx), %esi +; CHECK-NEXT: incq %rdx +; CHECK-NEXT: testb %sil, %sil ; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: jne LBB0_14 -; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_22: ## %land.lhs.true28 +; CHECK-NEXT: jmp LBB0_25 +; CHECK-NEXT: LBB0_21: ## %land.lhs.true28 ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: je LBB0_21 -; CHECK-NEXT: ## %bb.23: ## %land.lhs.true28 -; CHECK-NEXT: cmpb $112, %cl +; CHECK-NEXT: testb %sil, %sil +; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: ## %bb.22: ## %land.lhs.true28 +; CHECK-NEXT: cmpb $112, %sil ; CHECK-NEXT: jne LBB0_14 -; CHECK-NEXT: ## %bb.24: ## %land.lhs.true35 -; CHECK-NEXT: cmpb $112, (%r15,%rbx) +; CHECK-NEXT: ## %bb.23: ## %land.lhs.true35 +; CHECK-NEXT: cmpb $112, (%r8,%rdx) ; CHECK-NEXT: jne LBB0_14 -; CHECK-NEXT: ## %bb.25: ## %land.lhs.true43 -; CHECK-NEXT: movq %r14, %rsi -; CHECK-NEXT: subq %rdx, %rsi -; CHECK-NEXT: addq %rbx, %rsi -; CHECK-NEXT: cmpq $1, %rsi -; CHECK-NEXT: je LBB0_21 -; CHECK-NEXT: ## %bb.26: ## %lor.lhs.false47 -; CHECK-NEXT: cmpq $2, %rsi -; CHECK-NEXT: jne LBB0_12 -; CHECK-NEXT: ## %bb.27: ## %land.lhs.true52 -; CHECK-NEXT: cmpb $73, -1(%r14,%rbx) -; CHECK-NEXT: jne LBB0_13 -; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_28: ## %if.else88 -; CHECK-NEXT: cmpb $1, %cl -; CHECK-NEXT: jne LBB0_37 -; CHECK-NEXT: ## %bb.29: ## %if.else88 -; CHECK-NEXT: cmpb $2, %dl -; CHECK-NEXT: jne LBB0_37 -; CHECK-NEXT: ## %bb.30: ## %while.cond95.preheader -; CHECK-NEXT: movb (%r14,%rbx), %cl +; CHECK-NEXT: ## %bb.24: ## %land.lhs.true43 +; CHECK-NEXT: movq %r11, %rdi +; CHECK-NEXT: subq %rcx, %rdi +; CHECK-NEXT: addq %rdx, %rdi +; CHECK-NEXT: cmpq $1, %rdi +; CHECK-NEXT: jne LBB0_44 +; CHECK-NEXT: LBB0_25: ## %return +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq +; CHECK-NEXT: LBB0_26: ## %if.else88 +; CHECK-NEXT: cmpb $1, %r9b +; CHECK-NEXT: jne LBB0_35 +; CHECK-NEXT: ## %bb.27: ## %if.else88 +; CHECK-NEXT: cmpb $2, %r10b +; CHECK-NEXT: jne LBB0_35 +; CHECK-NEXT: ## %bb.28: ## %while.cond95.preheader +; CHECK-NEXT: movb (%r11,%rdx), %sil ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: jne LBB0_32 -; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_31: ## %if.then117 -; CHECK-NEXT: ## in Loop: Header=BB0_32 Depth=1 -; CHECK-NEXT: movzbl 1(%r14,%rbx), %ecx -; CHECK-NEXT: incq %rbx -; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: je LBB0_21 -; CHECK-NEXT: LBB0_32: ## %land.rhs99 +; CHECK-NEXT: testb %sil, %sil +; CHECK-NEXT: jne LBB0_30 +; CHECK-NEXT: jmp LBB0_25 +; CHECK-NEXT: LBB0_29: ## %if.then117 +; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=1 +; CHECK-NEXT: movzbl 1(%r11,%rdx), %esi +; CHECK-NEXT: incq %rdx +; CHECK-NEXT: testb %sil, %sil +; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: LBB0_30: ## %land.rhs99 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movzbl (%r15,%rbx), %edx -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je LBB0_21 +; CHECK-NEXT: movzbl (%r8,%rdx), %ecx +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: ## %bb.31: ## %while.body104 +; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=1 +; CHECK-NEXT: cmpb %cl, %sil +; CHECK-NEXT: je LBB0_29 +; CHECK-NEXT: ## %bb.32: ## %while.body104 +; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=1 +; CHECK-NEXT: cmpb $42, %sil +; CHECK-NEXT: je LBB0_29 ; CHECK-NEXT: ## %bb.33: ## %while.body104 -; CHECK-NEXT: ## in Loop: Header=BB0_32 Depth=1 -; CHECK-NEXT: cmpb %dl, %cl -; CHECK-NEXT: je LBB0_31 -; CHECK-NEXT: ## %bb.34: ## %while.body104 -; CHECK-NEXT: ## in Loop: Header=BB0_32 Depth=1 -; CHECK-NEXT: cmpb $42, %cl -; CHECK-NEXT: je LBB0_31 -; CHECK-NEXT: ## %bb.35: ## %while.body104 -; CHECK-NEXT: ## in Loop: Header=BB0_32 Depth=1 -; CHECK-NEXT: cmpb $94, %dl -; CHECK-NEXT: je LBB0_31 -; CHECK-NEXT: LBB0_20: +; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=1 +; CHECK-NEXT: cmpb $94, %cl +; CHECK-NEXT: je LBB0_29 +; CHECK-NEXT: LBB0_46: ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: LBB0_21: ## %return -; CHECK-NEXT: addq $8, %rsp -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: popq %r12 -; CHECK-NEXT: popq %r13 -; CHECK-NEXT: popq %r14 -; CHECK-NEXT: popq %r15 -; CHECK-NEXT: popq %rbp +; CHECK-NEXT: addq $40, %rsp ; CHECK-NEXT: retq -; CHECK-NEXT: LBB0_37: ## %if.else123 +; CHECK-NEXT: LBB0_35: ## %if.else123 ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpb $1, %dl -; CHECK-NEXT: jne LBB0_21 -; CHECK-NEXT: ## %bb.38: ## %if.else123 -; CHECK-NEXT: cmpb $2, %cl -; CHECK-NEXT: jne LBB0_21 -; CHECK-NEXT: ## %bb.39: ## %while.cond130.preheader -; CHECK-NEXT: movb (%r14,%rbx), %cl -; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: jne LBB0_41 -; CHECK-NEXT: jmp LBB0_45 -; CHECK-NEXT: LBB0_40: ## %if.then152 -; CHECK-NEXT: ## in Loop: Header=BB0_41 Depth=1 -; CHECK-NEXT: movzbl 1(%r14,%rbx), %ecx -; CHECK-NEXT: incq %rbx -; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: je LBB0_45 -; CHECK-NEXT: LBB0_41: ## %land.rhs134 +; CHECK-NEXT: cmpb $1, %r10b +; CHECK-NEXT: jne LBB0_25 +; CHECK-NEXT: ## %bb.36: ## %if.else123 +; CHECK-NEXT: cmpb $2, %r9b +; CHECK-NEXT: jne LBB0_25 +; CHECK-NEXT: ## %bb.37: ## %while.cond130.preheader +; CHECK-NEXT: movb (%r11,%rdx), %sil +; CHECK-NEXT: testb %sil, %sil +; CHECK-NEXT: jne LBB0_39 +; CHECK-NEXT: jmp LBB0_43 +; CHECK-NEXT: LBB0_38: ## %if.then152 +; CHECK-NEXT: ## in Loop: Header=BB0_39 Depth=1 +; CHECK-NEXT: movzbl 1(%r11,%rdx), %esi +; CHECK-NEXT: incq %rdx +; CHECK-NEXT: testb %sil, %sil +; CHECK-NEXT: je LBB0_43 +; CHECK-NEXT: LBB0_39: ## %land.rhs134 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movzbl (%r15,%rbx), %edx -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je LBB0_45 +; CHECK-NEXT: movzbl (%r8,%rdx), %ecx +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: je LBB0_43 +; CHECK-NEXT: ## %bb.40: ## %while.body139 +; CHECK-NEXT: ## in Loop: Header=BB0_39 Depth=1 +; CHECK-NEXT: cmpb %cl, %sil +; CHECK-NEXT: je LBB0_38 +; CHECK-NEXT: ## %bb.41: ## %while.body139 +; CHECK-NEXT: ## in Loop: Header=BB0_39 Depth=1 +; CHECK-NEXT: cmpb $42, %cl +; CHECK-NEXT: je LBB0_38 ; CHECK-NEXT: ## %bb.42: ## %while.body139 -; CHECK-NEXT: ## in Loop: Header=BB0_41 Depth=1 -; CHECK-NEXT: cmpb %dl, %cl -; CHECK-NEXT: je LBB0_40 -; CHECK-NEXT: ## %bb.43: ## %while.body139 -; CHECK-NEXT: ## in Loop: Header=BB0_41 Depth=1 -; CHECK-NEXT: cmpb $42, %dl -; CHECK-NEXT: je LBB0_40 -; CHECK-NEXT: ## %bb.44: ## %while.body139 -; CHECK-NEXT: ## in Loop: Header=BB0_41 Depth=1 -; CHECK-NEXT: cmpb $94, %cl -; CHECK-NEXT: je LBB0_40 -; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_45: +; CHECK-NEXT: ## in Loop: Header=BB0_39 Depth=1 +; CHECK-NEXT: cmpb $94, %sil +; CHECK-NEXT: je LBB0_38 +; CHECK-NEXT: jmp LBB0_25 +; CHECK-NEXT: LBB0_43: ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: jmp LBB0_21 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq +; CHECK-NEXT: LBB0_44: ## %lor.lhs.false47 +; CHECK-NEXT: cmpq $2, %rdi +; CHECK-NEXT: jne LBB0_12 +; CHECK-NEXT: ## %bb.45: ## %land.lhs.true52 +; CHECK-NEXT: cmpb $73, -1(%r11,%rdx) +; CHECK-NEXT: jne LBB0_13 +; CHECK-NEXT: jmp LBB0_25 entry: %label56 = bitcast %struct.Connector_struct* %a to i16* %0 = load i16, i16* %label56, align 2 Index: llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll =================================================================== --- llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -68,7 +68,7 @@ ; CHECK-NEXT: je LBB0_55 ; CHECK-NEXT: ## %bb.6: ## %SyTime.exit2720 ; CHECK-NEXT: movq %rdx, %rbx -; CHECK-NEXT: movq %rdi, %rbp +; CHECK-NEXT: movq %rdi, %r14 ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: cmpq %rax, %rcx @@ -78,11 +78,10 @@ ; CHECK-NEXT: movl $32, %esi ; CHECK-NEXT: callq _memset ; CHECK-NEXT: LBB0_8: ## %while.body.preheader -; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: imulq $1040, %rbx, %rax ## imm = 0x410 ; CHECK-NEXT: movq _syBuf@{{.*}}(%rip), %rcx ; CHECK-NEXT: leaq 8(%rcx,%rax), %rbx -; CHECK-NEXT: movl $1, %r15d +; CHECK-NEXT: movl $1, %ebp ; CHECK-NEXT: movq _syCTRO@{{.*}}(%rip), %rax ; CHECK-NEXT: movb $1, %cl ; CHECK-NEXT: .p2align 4, 0x90 @@ -92,69 +91,70 @@ ; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: jne LBB0_9 ; CHECK-NEXT: ## %bb.10: ## %do.end -; CHECK-NEXT: xorl %r14d, %r14d -; CHECK-NEXT: testb %r14b, %r14b +; CHECK-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: xorl %r13d, %r13d +; CHECK-NEXT: testb %r13b, %r13b ; CHECK-NEXT: jne LBB0_11 ; CHECK-NEXT: ## %bb.12: ## %while.body200.preheader -; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: xorl %r12d, %r12d +; CHECK-NEXT: leaq {{.*}}(%rip), %rdx ; CHECK-NEXT: leaq {{.*}}(%rip), %rsi -; CHECK-NEXT: leaq {{.*}}(%rip), %rdi -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: xorl %r13d, %r13d +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: xorl %r14d, %r14d ; CHECK-NEXT: jmp LBB0_13 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_20: ## %sw.bb256 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl %r14d, %r13d +; CHECK-NEXT: movl %r13d, %r14d ; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: decl %r15d -; CHECK-NEXT: testl %r15d, %r15d -; CHECK-NEXT: movl %r13d, %r14d +; CHECK-NEXT: decl %ebp +; CHECK-NEXT: testl %ebp, %ebp +; CHECK-NEXT: movl %r14d, %r13d ; CHECK-NEXT: jle LBB0_22 ; CHECK-NEXT: LBB0_13: ## %while.body200 ; CHECK-NEXT: ## =>This Loop Header: Depth=1 ; CHECK-NEXT: ## Child Loop BB0_30 Depth 2 ; CHECK-NEXT: ## Child Loop BB0_38 Depth 2 -; CHECK-NEXT: leal -268(%r14), %eax +; CHECK-NEXT: leal -268(%r13), %eax ; CHECK-NEXT: cmpl $105, %eax ; CHECK-NEXT: ja LBB0_14 ; CHECK-NEXT: ## %bb.56: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movslq (%rdi,%rax,4), %rax -; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: movslq (%rsi,%rax,4), %rax +; CHECK-NEXT: addq %rsi, %rax ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: movl %r14d, %r13d +; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: movl %r13d, %r14d ; CHECK-NEXT: jne LBB0_21 ; CHECK-NEXT: jmp LBB0_55 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_14: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal 1(%r14), %eax +; CHECK-NEXT: leal 1(%r13), %eax ; CHECK-NEXT: cmpl $21, %eax ; CHECK-NEXT: ja LBB0_20 ; CHECK-NEXT: ## %bb.15: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $-1, %r13d -; CHECK-NEXT: movslq (%rsi,%rax,4), %rax -; CHECK-NEXT: addq %rsi, %rax +; CHECK-NEXT: movl $-1, %r14d +; CHECK-NEXT: movslq (%rdx,%rax,4), %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: LBB0_18: ## %while.cond201.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $1, %r13d +; CHECK-NEXT: movl $1, %r14d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_26: ## %sw.bb474 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: ## implicit-def: $r12 +; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: ## implicit-def: $r15 ; CHECK-NEXT: jne LBB0_34 ; CHECK-NEXT: ## %bb.27: ## %do.body479.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: ## implicit-def: $r12 +; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: ## implicit-def: $r15 ; CHECK-NEXT: jne LBB0_34 ; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -165,8 +165,8 @@ ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_32: ## %do.body479.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 -; CHECK-NEXT: leaq 1(%r12), %rax -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: leaq 1(%r15), %rax +; CHECK-NEXT: testb %r12b, %r12b ; CHECK-NEXT: je LBB0_33 ; CHECK-NEXT: ## %bb.29: ## %land.rhs485 ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 @@ -175,15 +175,14 @@ ; CHECK-NEXT: LBB0_30: ## %cond.true.i.i2780 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 -; CHECK-NEXT: movq %rax, %r12 -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: movq %rax, %r15 +; CHECK-NEXT: testb %r12b, %r12b ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: ## %bb.31: ## %lor.rhs500 ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 ; CHECK-NEXT: movl $256, %esi ## imm = 0x100 ; CHECK-NEXT: callq ___maskrune -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: testb %r12b, %r12b ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: jmp LBB0_34 ; CHECK-NEXT: LBB0_45: ## %sw.bb1134 @@ -193,23 +192,23 @@ ; CHECK-NEXT: cmpq %rax, %rcx ; CHECK-NEXT: jb LBB0_55 ; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: movl $268, %r13d ## imm = 0x10C +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: movl $268, %r14d ## imm = 0x10C ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_19: ## %sw.bb243 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $2, %r13d +; CHECK-NEXT: movl $2, %r14d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_40: ## %sw.bb566 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $20, %r13d +; CHECK-NEXT: movl $20, %r14d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_33: ## %if.end517.loopexitsplit ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: incq %r12 +; CHECK-NEXT: incq %r15 ; CHECK-NEXT: LBB0_34: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal -324(%r13), %eax +; CHECK-NEXT: leal -324(%r14), %eax ; CHECK-NEXT: cmpl $59, %eax ; CHECK-NEXT: ja LBB0_35 ; CHECK-NEXT: ## %bb.57: ## %if.end517 @@ -219,11 +218,11 @@ ; CHECK-NEXT: jb LBB0_38 ; CHECK-NEXT: LBB0_35: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $11, %r13d +; CHECK-NEXT: cmpl $11, %r14d ; CHECK-NEXT: je LBB0_38 ; CHECK-NEXT: ## %bb.36: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $24, %r13d +; CHECK-NEXT: cmpl $24, %r14d ; CHECK-NEXT: je LBB0_38 ; CHECK-NEXT: ## %bb.37: ## %if.then532 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -233,15 +232,15 @@ ; CHECK-NEXT: LBB0_38: ## %for.cond534 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: testb %r12b, %r12b ; CHECK-NEXT: jne LBB0_38 ; CHECK-NEXT: ## %bb.39: ## %for.cond542.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: movb $0, (%r12) -; CHECK-NEXT: movl %r14d, %r13d +; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: movb $0, (%r15) +; CHECK-NEXT: movl %r13d, %r14d +; CHECK-NEXT: leaq {{.*}}(%rip), %rdx ; CHECK-NEXT: leaq {{.*}}(%rip), %rsi -; CHECK-NEXT: leaq {{.*}}(%rip), %rdi ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_42: ## %while.cond864 @@ -256,15 +255,15 @@ ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: jmp LBB0_25 ; CHECK-NEXT: LBB0_11: -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: xorl %r13d, %r13d +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: xorl %r14d, %r14d ; CHECK-NEXT: LBB0_22: ## %while.end1465 -; CHECK-NEXT: incl %r13d -; CHECK-NEXT: cmpl $16, %r13d +; CHECK-NEXT: incl %r14d +; CHECK-NEXT: cmpl $16, %r14d ; CHECK-NEXT: ja LBB0_50 ; CHECK-NEXT: ## %bb.23: ## %while.end1465 ; CHECK-NEXT: movl $83969, %eax ## imm = 0x14801 -; CHECK-NEXT: btl %r13d, %eax +; CHECK-NEXT: btl %r14d, %eax ; CHECK-NEXT: jae LBB0_50 ; CHECK-NEXT: ## %bb.24: ; CHECK-NEXT: xorl %ebp, %ebp @@ -287,6 +286,7 @@ ; CHECK-NEXT: ## %bb.51: ## %for.body1664.lr.ph ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ebp ## 4-byte Reload ; CHECK-NEXT: jne LBB0_54 ; CHECK-NEXT: ## %bb.52: ## %while.body1679.preheader ; CHECK-NEXT: incl %ebp Index: llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll =================================================================== --- llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll +++ llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll @@ -23,44 +23,42 @@ ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: subl $8, %esp -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-NEXT: movl (%eax), %ecx -; CHECK-NEXT: movl %ecx, (%esp) # 4-byte Spill -; CHECK-NEXT: movl 4(%eax), %ebx -; CHECK-NEXT: movl 8(%eax), %ecx -; CHECK-NEXT: movl 12(%eax), %edx -; CHECK-NEXT: movl 16(%eax), %edi -; CHECK-NEXT: movl (%eax,%esi,4), %esi -; CHECK-NEXT: shll $5, %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl (%esi), %ebp +; CHECK-NEXT: movl 4(%esi), %ebx +; CHECK-NEXT: movl 8(%esi), %ecx +; CHECK-NEXT: movl 12(%esi), %edx +; CHECK-NEXT: movl 16(%esi), %edi +; CHECK-NEXT: movl (%esi,%eax,4), %eax +; CHECK-NEXT: shll $5, %eax ; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: je .LBB0_2 ; CHECK-NEXT: # %bb.1: # %if.then -; CHECK-NEXT: movl %esi, 24(%eax) -; CHECK-NEXT: movl %esi, %ebp -; CHECK-NEXT: movl (%esp), %eax # 4-byte Reload +; CHECK-NEXT: movl %eax, 24(%esi) ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: leal 28(%eax), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: leal 28(%ecx), %edx ; CHECK-NEXT: jmp .LBB0_3 ; CHECK-NEXT: .LBB0_2: # %if.else -; CHECK-NEXT: movl 20(%eax), %ebp +; CHECK-NEXT: movl %ebp, (%esp) # 4-byte Spill +; CHECK-NEXT: movl 20(%esi), %ebp ; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl %esi, 32(%eax) -; CHECK-NEXT: movl %esi, %ebp +; CHECK-NEXT: movl %eax, 32(%esi) +; CHECK-NEXT: movl %eax, %ebp ; CHECK-NEXT: movl (%esp), %eax # 4-byte Reload ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: leal 36(%eax), %ecx -; CHECK-NEXT: .LBB0_3: # %if.end -; CHECK-NEXT: movl %ebp, (%ecx) -; CHECK-NEXT: addl (%eax), %ebp ; CHECK-NEXT: movl %ebp, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: leal 36(%ecx), %edx +; CHECK-NEXT: .LBB0_3: # %if.end +; CHECK-NEXT: movl %eax, (%edx) +; CHECK-NEXT: addl (%ecx), %eax ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi Index: llvm/test/CodeGen/X86/sjlj-eh.ll =================================================================== --- llvm/test/CodeGen/X86/sjlj-eh.ll +++ llvm/test/CodeGen/X86/sjlj-eh.ll @@ -32,9 +32,9 @@ ; CHECK-NEXT: calll __Z20function_that_throwsv ; CHECK-NEXT: Ltmp1: ; CHECK-NEXT: # %bb.1: # %try.cont +; CHECK-NEXT: LBB0_2: # %try.cont ; CHECK-NEXT: leal -64(%ebp), %eax ; CHECK-NEXT: pushl %eax -; CHECK-NEXT: LBB0_2: # %try.cont ; CHECK-NEXT: calll __Unwind_SjLj_Unregister ; CHECK-NEXT: addl $56, %esp ; CHECK-NEXT: popl %esi @@ -43,7 +43,6 @@ ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl ; CHECK-NEXT: LBB0_3: -; CHECK-NEXT: leal -64(%ebp), %esi ; CHECK-NEXT: movl -60(%ebp), %eax ; CHECK-NEXT: cmpl $1, %eax ; CHECK-NEXT: jb LBB0_4 @@ -61,7 +60,6 @@ ; CHECK-NEXT: addl $4, %esp ; CHECK-NEXT: movl $-1, -60(%ebp) ; CHECK-NEXT: calll ___cxa_end_catch -; CHECK-NEXT: pushl %esi ; CHECK-NEXT: jmp LBB0_2 ; CHECK-NEXT: Lfunc_end0: ; Index: llvm/test/CodeGen/X86/speculative-load-hardening.ll =================================================================== --- llvm/test/CodeGen/X86/speculative-load-hardening.ll +++ llvm/test/CodeGen/X86/speculative-load-hardening.ll @@ -32,86 +32,85 @@ define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %ptr2, i32** %ptr3) speculative_load_hardening { ; X64-LABEL: test_basic_conditions: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %r15 -; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: pushq %r14 -; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: pushq %rbx +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: pushq %rax ; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: .cfi_offset %rbx, -32 -; X64-NEXT: .cfi_offset %r14, -24 -; X64-NEXT: .cfi_offset %r15, -16 +; X64-NEXT: .cfi_offset %rbx, -24 +; X64-NEXT: .cfi_offset %r14, -16 ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %rbx +; X64-NEXT: movq $-1, %r10 ; X64-NEXT: sarq $63, %rax ; X64-NEXT: testl %edi, %edi ; X64-NEXT: jne .LBB1_1 ; X64-NEXT: # %bb.2: # %then1 -; X64-NEXT: cmovneq %rbx, %rax +; X64-NEXT: cmovneq %r10, %rax ; X64-NEXT: testl %esi, %esi ; X64-NEXT: je .LBB1_4 ; X64-NEXT: .LBB1_1: -; X64-NEXT: cmoveq %rbx, %rax +; X64-NEXT: cmoveq %r10, %rax ; X64-NEXT: .LBB1_8: # %exit ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp -; X64-NEXT: popq %rbx +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .cfi_def_cfa_offset 24 -; X64-NEXT: popq %r14 +; X64-NEXT: popq %rbx ; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: popq %r15 +; X64-NEXT: popq %r14 ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq ; X64-NEXT: .LBB1_4: # %then2 ; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: movq %r8, %r14 -; X64-NEXT: cmovneq %rbx, %rax +; X64-NEXT: cmovneq %r10, %rax ; X64-NEXT: testl %edx, %edx ; X64-NEXT: je .LBB1_6 ; X64-NEXT: # %bb.5: # %else3 -; X64-NEXT: cmoveq %rbx, %rax -; X64-NEXT: movslq (%r9), %rcx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: leaq (%r14,%rcx,4), %r15 -; X64-NEXT: movl %ecx, (%r14,%rcx,4) +; X64-NEXT: cmoveq %r10, %rax +; X64-NEXT: movslq (%r9), %rdx +; X64-NEXT: orq %rax, %rdx +; X64-NEXT: leaq (%r8,%rdx,4), %rcx +; X64-NEXT: movl %edx, (%r8,%rdx,4) ; X64-NEXT: jmp .LBB1_7 ; X64-NEXT: .LBB1_6: # %then3 -; X64-NEXT: cmovneq %rbx, %rax +; X64-NEXT: cmovneq %r10, %rax ; X64-NEXT: movl (%rcx), %ecx -; X64-NEXT: addl (%r14), %ecx +; X64-NEXT: addl (%r8), %ecx ; X64-NEXT: movslq %ecx, %rdi ; X64-NEXT: orq %rax, %rdi -; X64-NEXT: movl (%r14,%rdi,4), %esi +; X64-NEXT: movl (%r8,%rdi,4), %esi ; X64-NEXT: orl %eax, %esi -; X64-NEXT: movq (%r9), %r15 -; X64-NEXT: orq %rax, %r15 -; X64-NEXT: addl (%r15), %esi +; X64-NEXT: movq (%r9), %rbx +; X64-NEXT: orq %rax, %rbx +; X64-NEXT: addl (%rbx), %esi ; X64-NEXT: shlq $47, %rax ; X64-NEXT: # kill: def $edi killed $edi killed $rdi ; X64-NEXT: orq %rax, %rsp +; X64-NEXT: movq %r8, (%rsp) # 8-byte Spill +; X64-NEXT: movq $-1, %r14 ; X64-NEXT: callq leak ; X64-NEXT: .Lslh_ret_addr0: +; X64-NEXT: movq %rbx, %rcx +; X64-NEXT: movq (%rsp), %r8 # 8-byte Reload ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; X64-NEXT: sarq $63, %rax -; X64-NEXT: cmpq $.Lslh_ret_addr0, %rcx -; X64-NEXT: cmovneq %rbx, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr0, %rdx +; X64-NEXT: cmovneq %r14, %rax ; X64-NEXT: .LBB1_7: # %merge -; X64-NEXT: movslq (%r15), %rcx +; X64-NEXT: movslq (%rcx), %rcx ; X64-NEXT: orq %rax, %rcx -; X64-NEXT: movl $0, (%r14,%rcx,4) +; X64-NEXT: movl $0, (%r8,%rcx,4) ; X64-NEXT: jmp .LBB1_8 ; ; X64-LFENCE-LABEL: test_basic_conditions: ; X64-LFENCE: # %bb.0: # %entry -; X64-LFENCE-NEXT: pushq %r14 -; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16 ; X64-LFENCE-NEXT: pushq %rbx -; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24 -; X64-LFENCE-NEXT: pushq %rax +; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16 +; X64-LFENCE-NEXT: subq $16, %rsp ; X64-LFENCE-NEXT: .cfi_def_cfa_offset 32 -; X64-LFENCE-NEXT: .cfi_offset %rbx, -24 -; X64-LFENCE-NEXT: .cfi_offset %r14, -16 +; X64-LFENCE-NEXT: .cfi_offset %rbx, -16 ; X64-LFENCE-NEXT: testl %edi, %edi ; X64-LFENCE-NEXT: jne .LBB1_6 ; X64-LFENCE-NEXT: # %bb.1: # %then1 @@ -119,36 +118,36 @@ ; X64-LFENCE-NEXT: testl %esi, %esi ; X64-LFENCE-NEXT: jne .LBB1_6 ; X64-LFENCE-NEXT: # %bb.2: # %then2 -; X64-LFENCE-NEXT: movq %r8, %rbx ; X64-LFENCE-NEXT: lfence ; X64-LFENCE-NEXT: testl %edx, %edx ; X64-LFENCE-NEXT: je .LBB1_3 ; X64-LFENCE-NEXT: # %bb.4: # %else3 ; X64-LFENCE-NEXT: lfence -; X64-LFENCE-NEXT: movslq (%r9), %rax -; X64-LFENCE-NEXT: leaq (%rbx,%rax,4), %r14 -; X64-LFENCE-NEXT: movl %eax, (%rbx,%rax,4) +; X64-LFENCE-NEXT: movslq (%r9), %rcx +; X64-LFENCE-NEXT: leaq (%r8,%rcx,4), %rax +; X64-LFENCE-NEXT: movl %ecx, (%r8,%rcx,4) ; X64-LFENCE-NEXT: jmp .LBB1_5 ; X64-LFENCE-NEXT: .LBB1_3: # %then3 ; X64-LFENCE-NEXT: lfence ; X64-LFENCE-NEXT: movl (%rcx), %eax -; X64-LFENCE-NEXT: addl (%rbx), %eax +; X64-LFENCE-NEXT: addl (%r8), %eax ; X64-LFENCE-NEXT: movslq %eax, %rdi -; X64-LFENCE-NEXT: movl (%rbx,%rdi,4), %esi -; X64-LFENCE-NEXT: movq (%r9), %r14 -; X64-LFENCE-NEXT: addl (%r14), %esi +; X64-LFENCE-NEXT: movl (%r8,%rdi,4), %esi +; X64-LFENCE-NEXT: movq (%r9), %rbx +; X64-LFENCE-NEXT: addl (%rbx), %esi ; X64-LFENCE-NEXT: # kill: def $edi killed $edi killed $rdi +; X64-LFENCE-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-LFENCE-NEXT: callq leak +; X64-LFENCE-NEXT: movq %rbx, %rax +; X64-LFENCE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload ; X64-LFENCE-NEXT: .LBB1_5: # %merge -; X64-LFENCE-NEXT: movslq (%r14), %rax -; X64-LFENCE-NEXT: movl $0, (%rbx,%rax,4) +; X64-LFENCE-NEXT: movslq (%rax), %rax +; X64-LFENCE-NEXT: movl $0, (%r8,%rax,4) ; X64-LFENCE-NEXT: .LBB1_6: # %exit ; X64-LFENCE-NEXT: lfence -; X64-LFENCE-NEXT: addq $8, %rsp -; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24 -; X64-LFENCE-NEXT: popq %rbx +; X64-LFENCE-NEXT: addq $16, %rsp ; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16 -; X64-LFENCE-NEXT: popq %r14 +; X64-LFENCE-NEXT: popq %rbx ; X64-LFENCE-NEXT: .cfi_def_cfa_offset 8 ; X64-LFENCE-NEXT: retq entry: @@ -505,28 +504,22 @@ ; X64: # %bb.0: # %entry ; X64-NEXT: pushq %rbp ; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: pushq %r15 -; X64-NEXT: .cfi_def_cfa_offset 24 -; X64-NEXT: pushq %r14 -; X64-NEXT: .cfi_def_cfa_offset 32 ; X64-NEXT: pushq %rbx -; X64-NEXT: .cfi_def_cfa_offset 40 -; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: subq $24, %rsp ; X64-NEXT: .cfi_def_cfa_offset 48 -; X64-NEXT: .cfi_offset %rbx, -40 -; X64-NEXT: .cfi_offset %r14, -32 -; X64-NEXT: .cfi_offset %r15, -24 +; X64-NEXT: .cfi_offset %rbx, -24 ; X64-NEXT: .cfi_offset %rbp, -16 ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %r15 +; X64-NEXT: movq $-1, %rbx ; X64-NEXT: sarq $63, %rax ; X64-NEXT: cmpl $41, %edi ; X64-NEXT: jg .LBB4_1 ; X64-NEXT: # %bb.2: # %thrower -; X64-NEXT: movq %rdx, %r14 -; X64-NEXT: movq %rsi, %rbx -; X64-NEXT: cmovgq %r15, %rax +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: cmovgq %rbx, %rax ; X64-NEXT: movslq %edi, %rcx +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movl (%rsi,%rcx,4), %ebp ; X64-NEXT: orl %eax, %ebp ; X64-NEXT: movl $4, %edi @@ -538,7 +531,7 @@ ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; X64-NEXT: sarq $63, %rcx ; X64-NEXT: cmpq $.Lslh_ret_addr4, %rdx -; X64-NEXT: cmovneq %r15, %rcx +; X64-NEXT: cmovneq %rbx, %rcx ; X64-NEXT: movl %ebp, (%rax) ; X64-NEXT: .Ltmp0: ; X64-NEXT: shlq $47, %rcx @@ -552,21 +545,17 @@ ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax ; X64-NEXT: cmpq $.Lslh_ret_addr5, %rcx -; X64-NEXT: cmovneq %r15, %rax +; X64-NEXT: cmovneq %rbx, %rax ; X64-NEXT: .Ltmp1: ; X64-NEXT: jmp .LBB4_3 ; X64-NEXT: .LBB4_1: -; X64-NEXT: cmovleq %r15, %rax +; X64-NEXT: cmovleq %rbx, %rax ; X64-NEXT: .LBB4_3: # %exit ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp -; X64-NEXT: addq $8, %rsp -; X64-NEXT: .cfi_def_cfa_offset 40 -; X64-NEXT: popq %rbx -; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: popq %r14 +; X64-NEXT: addq $24, %rsp ; X64-NEXT: .cfi_def_cfa_offset 24 -; X64-NEXT: popq %r15 +; X64-NEXT: popq %rbx ; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: popq %rbp ; X64-NEXT: .cfi_def_cfa_offset 8 @@ -576,11 +565,13 @@ ; X64-NEXT: .Ltmp2: ; X64-NEXT: movq %rsp, %rcx ; X64-NEXT: sarq $63, %rcx -; X64-NEXT: movl (%rax), %eax -; X64-NEXT: addl (%rbx), %eax -; X64-NEXT: cltq -; X64-NEXT: orq %rcx, %rax -; X64-NEXT: movl (%r14,%rax,4), %edi +; X64-NEXT: movl (%rax), %edx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: addl (%rax), %edx +; X64-NEXT: movslq %edx, %rdx +; X64-NEXT: orq %rcx, %rdx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: movl (%rax,%rdx,4), %edi ; X64-NEXT: orl %ecx, %edi ; X64-NEXT: shlq $47, %rcx ; X64-NEXT: orq %rcx, %rsp @@ -590,7 +581,7 @@ ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax ; X64-NEXT: cmpq $.Lslh_ret_addr6, %rcx -; X64-NEXT: cmovneq %r15, %rax +; X64-NEXT: cmovneq %rbx, %rax ; ; X64-LFENCE-LABEL: test_basic_eh: ; X64-LFENCE: # %bb.0: # %entry Index: llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll =================================================================== --- llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll +++ llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll @@ -90,24 +90,23 @@ ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: pushq %r15 ; CHECK-NEXT: pushq %r14 -; CHECK-NEXT: pushq %r13 ; CHECK-NEXT: pushq %r12 ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: pushq %rax +; CHECK-NEXT: subq $16, %rsp ; CHECK-NEXT: movl $1, %ebx ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB1_26 ; CHECK-NEXT: # %bb.1: # %if.end19 -; CHECK-NEXT: movl %esi, %r13d -; CHECK-NEXT: movq %rdi, %r12 +; CHECK-NEXT: movl %esi, %r15d +; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: movl (%rax), %ebp -; CHECK-NEXT: leal (,%rbp,4), %r14d -; CHECK-NEXT: movl %r14d, %r15d +; CHECK-NEXT: leal (,%rbp,4), %r12d +; CHECK-NEXT: movl %r12d, %r14d ; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: movq %r15, %rdi +; CHECK-NEXT: movq %r14, %rdi ; CHECK-NEXT: callq cli_calloc -; CHECK-NEXT: testl %r13d, %r13d +; CHECK-NEXT: testl %r15d, %r15d ; CHECK-NEXT: je .LBB1_25 ; CHECK-NEXT: # %bb.2: # %if.end19 ; CHECK-NEXT: testl %ebp, %ebp @@ -118,13 +117,13 @@ ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB1_25 ; CHECK-NEXT: # %bb.4: # %if.end19 -; CHECK-NEXT: cmpq %r12, %rbx +; CHECK-NEXT: cmpq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload ; CHECK-NEXT: jb .LBB1_25 ; CHECK-NEXT: # %bb.5: # %if.end50 ; CHECK-NEXT: movq %rbx, %rdi -; CHECK-NEXT: movq %r15, %rdx +; CHECK-NEXT: movq %r14, %rdx ; CHECK-NEXT: callq memcpy -; CHECK-NEXT: cmpl $4, %r14d +; CHECK-NEXT: cmpl $4, %r12d ; CHECK-NEXT: jb .LBB1_28 ; CHECK-NEXT: # %bb.6: # %shared_preheader ; CHECK-NEXT: movb $32, %dl @@ -204,10 +203,9 @@ ; CHECK-NEXT: callq cli_dbgmsg ; CHECK-NEXT: .LBB1_26: # %cleanup ; CHECK-NEXT: movl %ebx, %eax -; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: addq $16, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 -; CHECK-NEXT: popq %r13 ; CHECK-NEXT: popq %r14 ; CHECK-NEXT: popq %r15 ; CHECK-NEXT: popq %rbp Index: llvm/test/CodeGen/X86/tail-opts.ll =================================================================== --- llvm/test/CodeGen/X86/tail-opts.ll +++ llvm/test/CodeGen/X86/tail-opts.ll @@ -239,78 +239,80 @@ define fastcc void @c_expand_expr_stmt(%union.tree_node* %expr) nounwind { ; CHECK-LABEL: c_expand_expr_stmt: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: pushq %rax ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB3_17 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: movb 0, %bl -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: movb 0, %al +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: jne .LBB3_16 ; CHECK-NEXT: # %bb.2: # %bb.i -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: je .LBB3_16 ; CHECK-NEXT: # %bb.3: # %lvalue_p.exit -; CHECK-NEXT: movq 0, %rax -; CHECK-NEXT: movzbl (%rax), %ecx -; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: movq 0, %rcx +; CHECK-NEXT: movzbl (%rcx), %edx +; CHECK-NEXT: testl %edx, %edx ; CHECK-NEXT: je .LBB3_12 ; CHECK-NEXT: # %bb.4: # %lvalue_p.exit -; CHECK-NEXT: cmpl $2, %ecx +; CHECK-NEXT: cmpl $2, %edx ; CHECK-NEXT: jne .LBB3_5 ; CHECK-NEXT: # %bb.6: # %bb.i1 -; CHECK-NEXT: movq 32(%rax), %rax -; CHECK-NEXT: movzbl 16(%rax), %ecx -; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: movq 32(%rcx), %rcx +; CHECK-NEXT: movzbl 16(%rcx), %edx +; CHECK-NEXT: testl %edx, %edx ; CHECK-NEXT: je .LBB3_10 ; CHECK-NEXT: # %bb.7: # %bb.i1 -; CHECK-NEXT: cmpl $2, %ecx +; CHECK-NEXT: cmpl $2, %edx ; CHECK-NEXT: jne .LBB3_8 ; CHECK-NEXT: # %bb.9: # %bb.i.i ; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: callq lvalue_p ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: setne %al -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; CHECK-NEXT: setne %cl +; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: je .LBB3_15 ; CHECK-NEXT: jmp .LBB3_17 ; CHECK-NEXT: .LBB3_16: # %bb1 -; CHECK-NEXT: cmpb $23, %bl +; CHECK-NEXT: cmpb $23, %al ; CHECK-NEXT: .LBB3_17: # %bb3 ; CHECK-NEXT: .LBB3_12: # %bb2.i3 -; CHECK-NEXT: movq 8(%rax), %rax -; CHECK-NEXT: movb 16(%rax), %cl -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpb $23, %cl +; CHECK-NEXT: movq 8(%rcx), %rcx +; CHECK-NEXT: movb 16(%rcx), %dl +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: cmpb $23, %dl ; CHECK-NEXT: je .LBB3_14 ; CHECK-NEXT: # %bb.13: # %bb2.i3 -; CHECK-NEXT: cmpb $16, %cl +; CHECK-NEXT: cmpb $16, %dl ; CHECK-NEXT: je .LBB3_14 ; CHECK-NEXT: jmp .LBB3_17 ; CHECK-NEXT: .LBB3_5: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: je .LBB3_15 ; CHECK-NEXT: jmp .LBB3_17 ; CHECK-NEXT: .LBB3_10: # %bb2.i.i2 -; CHECK-NEXT: movq 8(%rax), %rax -; CHECK-NEXT: movb 16(%rax), %cl -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpb $16, %cl +; CHECK-NEXT: movq 8(%rcx), %rcx +; CHECK-NEXT: movb 16(%rcx), %dl +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: cmpb $16, %dl ; CHECK-NEXT: je .LBB3_14 ; CHECK-NEXT: # %bb.11: # %bb2.i.i2 -; CHECK-NEXT: cmpb $23, %cl +; CHECK-NEXT: cmpb $23, %dl ; CHECK-NEXT: je .LBB3_14 ; CHECK-NEXT: jmp .LBB3_17 ; CHECK-NEXT: .LBB3_8: -; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: .LBB3_14: # %lvalue_p.exit4 -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: jne .LBB3_17 ; CHECK-NEXT: .LBB3_15: # %lvalue_p.exit4 -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: testb %al, %al entry: %tmp4 = load i8, i8* null, align 8 ; [#uses=3] switch i8 %tmp4, label %bb3 [ Index: llvm/test/CodeGen/X86/tbm_patterns.ll =================================================================== --- llvm/test/CodeGen/X86/tbm_patterns.ll +++ llvm/test/CodeGen/X86/tbm_patterns.ll @@ -875,14 +875,16 @@ define i32 @blcic32_branch(i32 %x) nounwind { ; CHECK-LABEL: blcic32_branch: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: blcicl %edi, %ebx -; CHECK-NEXT: jne .LBB69_2 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: blcicl %edi, %eax +; CHECK-NEXT: je .LBB69_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB69_1: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: callq bar -; CHECK-NEXT: .LBB69_2: -; CHECK-NEXT: movl %ebx, %eax -; CHECK-NEXT: popq %rbx +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: retq %tmp = xor i32 %x, -1 %tmp2 = add i32 %x, 1 @@ -898,14 +900,16 @@ define i64 @blcic64_branch(i64 %x) nounwind { ; CHECK-LABEL: blcic64_branch: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: blcicq %rdi, %rbx -; CHECK-NEXT: jne .LBB70_2 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: blcicq %rdi, %rax +; CHECK-NEXT: je .LBB70_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB70_1: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movq %rax, (%rsp) # 8-byte Spill ; CHECK-NEXT: callq bar -; CHECK-NEXT: .LBB70_2: -; CHECK-NEXT: movq %rbx, %rax -; CHECK-NEXT: popq %rbx +; CHECK-NEXT: movq (%rsp), %rax # 8-byte Reload +; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: retq %tmp = xor i64 %x, -1 %tmp2 = add i64 %x, 1 @@ -921,14 +925,16 @@ define i32 @tzmsk32_branch(i32 %x) nounwind { ; CHECK-LABEL: tzmsk32_branch: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: tzmskl %edi, %ebx -; CHECK-NEXT: jne .LBB71_2 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: tzmskl %edi, %eax +; CHECK-NEXT: je .LBB71_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB71_1: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: callq bar -; CHECK-NEXT: .LBB71_2: -; CHECK-NEXT: movl %ebx, %eax -; CHECK-NEXT: popq %rbx +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: retq %tmp = xor i32 %x, -1 %tmp2 = add i32 %x, -1 @@ -944,14 +950,16 @@ define i64 @tzmsk64_branch(i64 %x) nounwind { ; CHECK-LABEL: tzmsk64_branch: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: tzmskq %rdi, %rbx -; CHECK-NEXT: jne .LBB72_2 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: tzmskq %rdi, %rax +; CHECK-NEXT: je .LBB72_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB72_1: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movq %rax, (%rsp) # 8-byte Spill ; CHECK-NEXT: callq bar -; CHECK-NEXT: .LBB72_2: -; CHECK-NEXT: movq %rbx, %rax -; CHECK-NEXT: popq %rbx +; CHECK-NEXT: movq (%rsp), %rax # 8-byte Reload +; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: retq %tmp = xor i64 %x, -1 %tmp2 = add i64 %x, -1 Index: llvm/test/CodeGen/X86/x86-shrink-wrapping.ll =================================================================== --- llvm/test/CodeGen/X86/x86-shrink-wrapping.ll +++ llvm/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -1374,9 +1374,6 @@ ; ENABLE-NEXT: .cfi_offset %rbp, -16 ; ENABLE-NEXT: movq %rsp, %rbp ; ENABLE-NEXT: .cfi_def_cfa_register %rbp -; ENABLE-NEXT: pushq %rbx -; ENABLE-NEXT: pushq %rax -; ENABLE-NEXT: .cfi_offset %rbx, -24 ; ENABLE-NEXT: movq _irreducibleCFGf@{{.*}}(%rip), %rax ; ENABLE-NEXT: cmpb $0, (%rax) ; ENABLE-NEXT: je LBB16_2 @@ -1387,26 +1384,22 @@ ; ENABLE-NEXT: LBB16_2: ## %split ; ENABLE-NEXT: movq _irreducibleCFGb@{{.*}}(%rip), %rax ; ENABLE-NEXT: cmpl $0, (%rax) -; ENABLE-NEXT: je LBB16_3 -; ENABLE-NEXT: ## %bb.4: ## %for.body4.i +; ENABLE-NEXT: je LBB16_4 +; ENABLE-NEXT: ## %bb.3: ## %for.body4.i ; ENABLE-NEXT: movq _irreducibleCFGa@{{.*}}(%rip), %rax ; ENABLE-NEXT: movl (%rax), %edi -; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: callq _something -; ENABLE-NEXT: jmp LBB16_5 -; ENABLE-NEXT: LBB16_3: -; ENABLE-NEXT: xorl %ebx, %ebx +; ENABLE-NEXT: LBB16_4: ## %for.inc +; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: .p2align 4, 0x90 ; ENABLE-NEXT: LBB16_5: ## %for.inc ; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 -; ENABLE-NEXT: incl %ebx -; ENABLE-NEXT: cmpl $7, %ebx +; ENABLE-NEXT: incl %eax +; ENABLE-NEXT: cmpl $7, %eax ; ENABLE-NEXT: jl LBB16_5 ; ENABLE-NEXT: ## %bb.6: ## %fn1.exit ; ENABLE-NEXT: xorl %eax, %eax -; ENABLE-NEXT: addq $8, %rsp -; ENABLE-NEXT: popq %rbx ; ENABLE-NEXT: popq %rbp ; ENABLE-NEXT: retq ; @@ -1417,9 +1410,6 @@ ; DISABLE-NEXT: .cfi_offset %rbp, -16 ; DISABLE-NEXT: movq %rsp, %rbp ; DISABLE-NEXT: .cfi_def_cfa_register %rbp -; DISABLE-NEXT: pushq %rbx -; DISABLE-NEXT: pushq %rax -; DISABLE-NEXT: .cfi_offset %rbx, -24 ; DISABLE-NEXT: movq _irreducibleCFGf@{{.*}}(%rip), %rax ; DISABLE-NEXT: cmpb $0, (%rax) ; DISABLE-NEXT: je LBB16_2 @@ -1430,26 +1420,22 @@ ; DISABLE-NEXT: LBB16_2: ## %split ; DISABLE-NEXT: movq _irreducibleCFGb@{{.*}}(%rip), %rax ; DISABLE-NEXT: cmpl $0, (%rax) -; DISABLE-NEXT: je LBB16_3 -; DISABLE-NEXT: ## %bb.4: ## %for.body4.i +; DISABLE-NEXT: je LBB16_4 +; DISABLE-NEXT: ## %bb.3: ## %for.body4.i ; DISABLE-NEXT: movq _irreducibleCFGa@{{.*}}(%rip), %rax ; DISABLE-NEXT: movl (%rax), %edi -; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: callq _something -; DISABLE-NEXT: jmp LBB16_5 -; DISABLE-NEXT: LBB16_3: -; DISABLE-NEXT: xorl %ebx, %ebx +; DISABLE-NEXT: LBB16_4: ## %for.inc +; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: .p2align 4, 0x90 ; DISABLE-NEXT: LBB16_5: ## %for.inc ; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 -; DISABLE-NEXT: incl %ebx -; DISABLE-NEXT: cmpl $7, %ebx +; DISABLE-NEXT: incl %eax +; DISABLE-NEXT: cmpl $7, %eax ; DISABLE-NEXT: jl LBB16_5 ; DISABLE-NEXT: ## %bb.6: ## %fn1.exit ; DISABLE-NEXT: xorl %eax, %eax -; DISABLE-NEXT: addq $8, %rsp -; DISABLE-NEXT: popq %rbx ; DISABLE-NEXT: popq %rbp ; DISABLE-NEXT: retq entry: Index: llvm/test/DebugInfo/X86/live-debug-values.ll =================================================================== --- llvm/test/DebugInfo/X86/live-debug-values.ll +++ llvm/test/DebugInfo/X86/live-debug-values.ll @@ -29,8 +29,8 @@ ; DBG_VALUE for variable "n" is extended into %bb.5 from its predecessors %bb.3 ; and %bb.4. +; CHECK: #DEBUG_VALUE: main:n <- $ebx ; CHECK: .LBB0_5: -; CHECK-NEXT: #DEBUG_VALUE: main:n <- $ebx ; Other register values have been clobbered. ; CHECK-NOT: #DEBUG_VALUE: ; CHECK: movl %e{{..}}, m(%rip)