Index: llvm/lib/CodeGen/RegAllocGreedy.cpp =================================================================== --- llvm/lib/CodeGen/RegAllocGreedy.cpp +++ llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -2842,28 +2842,8 @@ } void RAGreedy::initializeCSRCost() { - // We use the larger one out of the command-line option and the value report - // by TRI. - CSRCost = BlockFrequency( - std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost())); - if (!CSRCost.getFrequency()) - return; - - // Raw cost is relative to Entry == 2^14; scale it appropriately. - uint64_t ActualEntry = MBFI->getEntryFreq(); - if (!ActualEntry) { - CSRCost = 0; - return; - } - uint64_t FixedEntry = 1 << 14; - if (ActualEntry < FixedEntry) - CSRCost *= BranchProbability(ActualEntry, FixedEntry); - else if (ActualEntry <= UINT32_MAX) - // Invert the fraction and divide. - CSRCost /= BranchProbability(FixedEntry, ActualEntry); - else - // Can't use BranchProbability in general, since it takes 32-bit numbers. - CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); + // We need scale the cost relative to entry freq. + CSRCost = BlockFrequency(MBFI->getEntryFreq() * TRI->getCSRFirstUseCost()); } /// Collect the hint info for \p Reg. Index: llvm/lib/Target/AArch64/AArch64RegisterInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -50,10 +50,9 @@ CallingConv::ID) const override; unsigned getCSRFirstUseCost() const override { - // The cost will be compared against BlockFrequency where entry has the - // value of 1 << 14. A value of 5 will choose to spill or split really - // cold path instead of using a callee-saved register. - return 5; + // If use CSR, the cost is load/store pair in prologue/epilogue. + // So the cost is 1 time spill like the cost calculated in SpillPlacer. + return 1; } const TargetRegisterClass * Index: llvm/lib/Target/PowerPC/PPCRegisterInfo.h =================================================================== --- llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -100,6 +100,12 @@ return true; } + unsigned getCSRFirstUseCost() const override { + // If use CSR, the cost is load/store pair in prologue/epilogue. + // So the cost is 1 time spill like the cost calculated in SpillPlacer. + return 1; + } + void lowerDynamicAlloc(MachineBasicBlock::iterator II) const; void lowerDynamicAreaOffset(MachineBasicBlock::iterator II) const; void lowerCRSpilling(MachineBasicBlock::iterator II, Index: llvm/lib/Target/X86/X86RegisterInfo.h =================================================================== --- llvm/lib/Target/X86/X86RegisterInfo.h +++ llvm/lib/Target/X86/X86RegisterInfo.h @@ -109,6 +109,12 @@ CallingConv::ID) const override; const uint32_t *getNoPreservedMask() const override; + unsigned getCSRFirstUseCost() const override { + // If use CSR, the cost is load/store pair in prologue/epilogue. + // So the cost is 1 time spill like the cost calculated in SpillPlacer. + return 1; + } + // Calls involved in thread-local variable lookup save more registers than // normal calls, so they need a different mask to represent this. const uint32_t *getDarwinTLSCallPreservedMask() const; Index: llvm/test/CodeGen/AArch64/cgp-usubo.ll =================================================================== --- llvm/test/CodeGen/AArch64/cgp-usubo.ll +++ llvm/test/CodeGen/AArch64/cgp-usubo.ll @@ -151,30 +151,33 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, i64* %p, i1 %cond) nounwind { ; CHECK-LABEL: usubo_ult_cmp_dominates_i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #48 // =48 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: mov w20, w3 -; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: tbz w3, #0, .LBB8_3 ; CHECK-NEXT: // %bb.1: // %t ; CHECK-NEXT: cmp x0, x1 -; CHECK-NEXT: mov x22, x0 -; CHECK-NEXT: cset w0, lo -; CHECK-NEXT: mov x19, x2 -; CHECK-NEXT: mov x21, x1 +; CHECK-NEXT: cset w8, lo +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: str x2, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: str w3, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: mov x20, x1 ; CHECK-NEXT: bl call -; CHECK-NEXT: subs x8, x22, x21 +; CHECK-NEXT: ldr w3, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: subs x9, x19, x20 ; CHECK-NEXT: b.hs .LBB8_3 ; CHECK-NEXT: // %bb.2: // %end ; CHECK-NEXT: cset w0, lo -; CHECK-NEXT: str x8, [x19] +; CHECK-NEXT: str x9, [x8] ; CHECK-NEXT: b .LBB8_4 ; CHECK-NEXT: .LBB8_3: // %f -; CHECK-NEXT: and w0, w20, #0x1 +; CHECK-NEXT: and w0, w3, #0x1 ; CHECK-NEXT: .LBB8_4: // %f ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 // =48 ; CHECK-NEXT: ret entry: br i1 %cond, label %t, label %f Index: llvm/test/CodeGen/AArch64/csr-split.ll =================================================================== --- llvm/test/CodeGen/AArch64/csr-split.ll +++ llvm/test/CodeGen/AArch64/csr-split.ll @@ -86,7 +86,6 @@ ; CHECK-NEXT: // %bb.1: // %if.end ; CHECK-NEXT: adrp x8, a ; CHECK-NEXT: ldrsw x8, [x8, :lo12:a] -; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: cmp x8, x0 ; CHECK-NEXT: b.eq .LBB1_3 ; CHECK-NEXT: .LBB1_2: // %return @@ -94,6 +93,7 @@ ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_3: // %if.then2 +; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: bl callVoid ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload @@ -114,7 +114,6 @@ ; CHECK-APPLE-NEXT: adrp x8, _a@PAGE ; CHECK-APPLE-NEXT: Lloh3: ; CHECK-APPLE-NEXT: ldrsw x8, [x8, _a@PAGEOFF] -; CHECK-APPLE-NEXT: mov x19, x0 ; CHECK-APPLE-NEXT: cmp x8, x0 ; CHECK-APPLE-NEXT: b.eq LBB1_3 ; CHECK-APPLE-NEXT: LBB1_2: ; %return @@ -123,6 +122,7 @@ ; CHECK-APPLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload ; CHECK-APPLE-NEXT: ret ; CHECK-APPLE-NEXT: LBB1_3: ; %if.then2 +; CHECK-APPLE-NEXT: mov x19, x0 ; CHECK-APPLE-NEXT: bl _callVoid ; CHECK-APPLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-APPLE-NEXT: mov x0, x19 Index: llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll =================================================================== --- llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll +++ llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll @@ -141,11 +141,12 @@ ; CHECK-NEXT: stw 3, 216(1) ; CHECK-NEXT: lfd 2, 216(1) ; CHECK-NEXT: bl __gcc_qadd@PLT +; CHECK-NEXT: mcrf 0, 2 ; CHECK-NEXT: blt 2, .LBB0_7 ; CHECK-NEXT: # %bb.6: # %bb1 ; CHECK-NEXT: fmr 2, 28 ; CHECK-NEXT: .LBB0_7: # %bb1 -; CHECK-NEXT: blt 2, .LBB0_9 +; CHECK-NEXT: blt 0, .LBB0_9 ; CHECK-NEXT: # %bb.8: # %bb1 ; CHECK-NEXT: fmr 1, 29 ; CHECK-NEXT: .LBB0_9: # %bb1 Index: llvm/test/CodeGen/PowerPC/csr-split.ll =================================================================== --- llvm/test/CodeGen/PowerPC/csr-split.ll +++ llvm/test/CodeGen/PowerPC/csr-split.ll @@ -18,20 +18,21 @@ ; CHECK-PWR9-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-PWR9-NEXT: std r0, 16(r1) ; CHECK-PWR9-NEXT: stdu r1, -48(r1) -; CHECK-PWR9-NEXT: mr r30, r3 -; CHECK-PWR9-NEXT: addis r3, r2, a@toc@ha -; CHECK-PWR9-NEXT: lwa r3, a@toc@l(r3) -; CHECK-PWR9-NEXT: cmpld r3, r30 -; CHECK-PWR9-NEXT: # implicit-def: $r3 +; CHECK-PWR9-NEXT: addis r4, r2, a@toc@ha +; CHECK-PWR9-NEXT: lwa r4, a@toc@l(r4) +; CHECK-PWR9-NEXT: cmpld r4, r3 +; CHECK-PWR9-NEXT: # implicit-def: $r4 ; CHECK-PWR9-NEXT: bne cr0, .LBB0_2 ; CHECK-PWR9-NEXT: # %bb.1: # %if.then +; CHECK-PWR9-NEXT: mr r30, r3 ; CHECK-PWR9-NEXT: bl callVoid ; CHECK-PWR9-NEXT: nop ; CHECK-PWR9-NEXT: mr r3, r30 ; CHECK-PWR9-NEXT: bl callNonVoid ; CHECK-PWR9-NEXT: nop +; CHECK-PWR9-NEXT: mr r4, r3 ; CHECK-PWR9-NEXT: .LBB0_2: # %if.end -; CHECK-PWR9-NEXT: extsw r3, r3 +; CHECK-PWR9-NEXT: extsw r3, r4 ; CHECK-PWR9-NEXT: addi r1, r1, 48 ; CHECK-PWR9-NEXT: ld r0, 16(r1) ; CHECK-PWR9-NEXT: mtlr r0 @@ -49,8 +50,8 @@ ; CHECK-NEXT: addis r4, r2, a@toc@ha ; CHECK-NEXT: std r30, 112(r1) # 8-byte Folded Spill ; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: lwa r4, a@toc@l(r4) -; CHECK-NEXT: cmpld r4, r3 +; CHECK-NEXT: lwa r5, a@toc@l(r4) +; CHECK-NEXT: cmpld r5, r3 ; CHECK-NEXT: # implicit-def: $r3 ; CHECK-NEXT: bne cr0, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %if.then @@ -102,9 +103,9 @@ ; CHECK-PWR9-NEXT: cmpldi r30, 0 ; CHECK-PWR9-NEXT: beq cr0, .LBB1_3 ; CHECK-PWR9-NEXT: # %bb.1: # %if.end -; CHECK-PWR9-NEXT: addis r4, r2, a@toc@ha -; CHECK-PWR9-NEXT: lwa r4, a@toc@l(r4) -; CHECK-PWR9-NEXT: cmpld r4, r30 +; CHECK-PWR9-NEXT: addis r5, r2, a@toc@ha +; CHECK-PWR9-NEXT: lwa r5, a@toc@l(r5) +; CHECK-PWR9-NEXT: cmpld r5, r30 ; CHECK-PWR9-NEXT: bne cr0, .LBB1_3 ; CHECK-PWR9-NEXT: # %bb.2: # %if.then2 ; CHECK-PWR9-NEXT: bl callVoid @@ -134,9 +135,9 @@ ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: beq cr0, .LBB1_3 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: addis r4, r2, a@toc@ha -; CHECK-NEXT: lwa r4, a@toc@l(r4) -; CHECK-NEXT: cmpld r4, r30 +; CHECK-NEXT: addis r5, r2, a@toc@ha +; CHECK-NEXT: lwa r5, a@toc@l(r5) +; CHECK-NEXT: cmpld r5, r30 ; CHECK-NEXT: bne cr0, .LBB1_3 ; CHECK-NEXT: # %bb.2: # %if.then2 ; CHECK-NEXT: bl callVoid Index: llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll =================================================================== --- llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll +++ llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll @@ -1,4 +1,5 @@ -; RUN: llc -O2 -o - %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -O2 -o - %s | FileCheck %s target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-grtev4-linux-gnu" @@ -10,21 +11,47 @@ ; body2 ; exit -;CHECK-LABEL: tail_dup_break_cfg: -;CHECK: mr [[TAGREG:[0-9]+]], 3 -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 -;CHECK-NEXT: bc 12, 1, [[BODY1LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: # %test2 -;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 2 -;CHECK-NEXT: bne 0, [[BODY2LABEL:[._0-9A-Za-z]+]] -;CHECK: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit -;CHECK: blr -;CHECK-NEXT: [[BODY1LABEL]] -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 2 -;CHECK-NEXT: beq 0, [[EXITLABEL]] -;CHECK-NEXT: [[BODY2LABEL:[._0-9A-Za-z]+]]: -;CHECK: b [[EXITLABEL]] define void @tail_dup_break_cfg(i32 %tag) { +; CHECK-LABEL: tail_dup_break_cfg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -48(1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: andi. 4, 3, 1 +; CHECK-NEXT: bc 12, 1, .LBB0_3 +; CHECK-NEXT: # %bb.1: # %test2 +; CHECK-NEXT: andi. 3, 3, 2 +; CHECK-NEXT: bne 0, .LBB0_4 +; CHECK-NEXT: .LBB0_2: # %exit +; CHECK-NEXT: addi 1, 1, 48 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB0_3: # %body1 +; CHECK-NEXT: std 3, 40(1) # 8-byte Folded Spill +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload +; CHECK-NEXT: andi. 3, 3, 2 +; CHECK-NEXT: beq 0, .LBB0_2 +; CHECK-NEXT: .LBB0_4: # %body2 +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: b .LBB0_2 entry: br label %test1 test1: @@ -52,18 +79,44 @@ } ; The branch weights here hint that we shouldn't tail duplicate in this case. -;CHECK-LABEL: tail_dup_dont_break_cfg: -;CHECK: mr [[TAGREG:[0-9]+]], 3 -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 -;CHECK-NEXT: bc 4, 1, [[TEST2LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: # %body1 -;CHECK: [[TEST2LABEL]]: # %test2 -;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 2 -;CHECK-NEXT: beq 0, [[EXITLABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: # %body2 -;CHECK: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit -;CHECK: blr define void @tail_dup_dont_break_cfg(i32 %tag) { +; CHECK-LABEL: tail_dup_dont_break_cfg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -48(1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: andi. 4, 3, 1 +; CHECK-NEXT: bc 4, 1, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %body1 +; CHECK-NEXT: std 3, 40(1) # 8-byte Folded Spill +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload +; CHECK-NEXT: .LBB1_2: # %test2 +; CHECK-NEXT: andi. 3, 3, 2 +; CHECK-NEXT: beq 0, .LBB1_4 +; CHECK-NEXT: # %bb.3: # %body2 +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: .LBB1_4: # %exit +; CHECK-NEXT: addi 1, 1, 48 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr entry: br label %test1 test1: @@ -98,18 +151,43 @@ ; out. When we consider whether to lay out succ after bb and to tail-duplicate ; it, v and ret have already been placed, so we tail-duplicate as it removes a ; branch and strictly increases fallthrough -; CHECK-LABEL: tail_dup_no_succ -; CHECK: # %entry -; CHECK: # %v -; CHECK: # %ret -; CHECK: # %bb -; CHECK: # %succ -; CHECK: # %c -; CHECK: bl c -; CHECK: andi. {{[0-9]+}}, {{[0-9]+}}, 4 -; CHECK: beq -; CHECK: b define void @tail_dup_no_succ(i32 %tag) { +; CHECK-LABEL: tail_dup_no_succ: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -48(1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: andi. 4, 3, 1 +; CHECK-NEXT: bc 12, 1, .LBB2_3 +; CHECK-NEXT: .LBB2_1: # %v +; CHECK-NEXT: bl d +; CHECK-NEXT: nop +; CHECK-NEXT: bl d +; CHECK-NEXT: nop +; CHECK-NEXT: .LBB2_2: # %ret +; CHECK-NEXT: addi 1, 1, 48 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB2_3: # %bb +; CHECK-NEXT: andi. 4, 3, 2 +; CHECK-NEXT: bne 0, .LBB2_5 +; CHECK-NEXT: # %bb.4: # %succ +; CHECK-NEXT: andi. 3, 3, 4 +; CHECK-NEXT: beq 0, .LBB2_2 +; CHECK-NEXT: b .LBB2_1 +; CHECK-NEXT: .LBB2_5: # %c +; CHECK-NEXT: std 3, 40(1) # 8-byte Folded Spill +; CHECK-NEXT: bl c +; CHECK-NEXT: nop +; CHECK-NEXT: bl c +; CHECK-NEXT: nop +; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload +; CHECK-NEXT: andi. 3, 3, 4 +; CHECK-NEXT: beq 0, .LBB2_2 +; CHECK-NEXT: b .LBB2_1 entry: %tagbit1 = and i32 %tag, 1 %tagbit1eq0 = icmp eq i32 %tagbit1, 0 Index: llvm/test/CodeGen/PowerPC/tail-dup-layout.ll =================================================================== --- llvm/test/CodeGen/PowerPC/tail-dup-layout.ll +++ llvm/test/CodeGen/PowerPC/tail-dup-layout.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -O2 -ppc-reduce-cr-logicals -o - %s | FileCheck \ ; RUN: --check-prefix=CHECK --check-prefix=CHECK-O2 %s ; RUN: llc -O3 -ppc-reduce-cr-logicals -o - %s | FileCheck \ @@ -22,35 +23,79 @@ ; The CHECK statements check for the whole string of tests ; and then check that the correct test has been duplicated into the end of ; the optional blocks and that the optional blocks are in the correct order. -;CHECK-LABEL: straight_test: ; test1 may have been merged with entry -;CHECK: mr [[TAGREG:[0-9]+]], 3 -;CHECK: andi. {{[0-9]+}}, [[TAGREG:[0-9]+]], 1 -;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]] -;CHECK-NEXT: # %test2 -;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 2 -;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]] -;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3 -;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 4 -;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]] -;CHECK-NEXT: .[[TEST4LABEL:[_0-9A-Za-z]+]]: # %test4 -;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 8 -;CHECK-NEXT: bne 0, .[[OPT4LABEL:[_0-9A-Za-z]+]] -;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit -;CHECK: blr -;CHECK-NEXT: .[[OPT1LABEL]]: -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 2 -;CHECK-NEXT: beq 0, .[[TEST3LABEL]] -;CHECK-NEXT: .[[OPT2LABEL]]: -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 4 -;CHECK-NEXT: beq 0, .[[TEST4LABEL]] -;CHECK-NEXT: .[[OPT3LABEL]]: -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 8 -;CHECK-NEXT: beq 0, .[[EXITLABEL]] -;CHECK-NEXT: .[[OPT4LABEL]]: -;CHECK: b .[[EXITLABEL]] define void @straight_test(i32 %tag) { +; CHECK-LABEL: straight_test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -48(1) +; CHECK-NEXT: mr 30, 3 +; CHECK-NEXT: andi. 3, 30, 1 +; CHECK-NEXT: bc 12, 1, .LBB0_5 +; CHECK-NEXT: # %bb.1: # %test2 +; CHECK-NEXT: andi. 3, 30, 2 +; CHECK-NEXT: bne 0, .LBB0_6 +; CHECK-NEXT: .LBB0_2: # %test3 +; CHECK-NEXT: andi. 3, 30, 4 +; CHECK-NEXT: bne 0, .LBB0_7 +; CHECK-NEXT: .LBB0_3: # %test4 +; CHECK-NEXT: andi. 3, 30, 8 +; CHECK-NEXT: bne 0, .LBB0_8 +; CHECK-NEXT: .LBB0_4: # %exit +; CHECK-NEXT: addi 1, 1, 48 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB0_5: # %optional1 +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: andi. 3, 30, 2 +; CHECK-NEXT: beq 0, .LBB0_2 +; CHECK-NEXT: .LBB0_6: # %optional2 +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: andi. 3, 30, 4 +; CHECK-NEXT: beq 0, .LBB0_3 +; CHECK-NEXT: .LBB0_7: # %optional3 +; CHECK-NEXT: bl c +; CHECK-NEXT: nop +; CHECK-NEXT: bl c +; CHECK-NEXT: nop +; CHECK-NEXT: bl c +; CHECK-NEXT: nop +; CHECK-NEXT: bl c +; CHECK-NEXT: nop +; CHECK-NEXT: andi. 3, 30, 8 +; CHECK-NEXT: beq 0, .LBB0_4 +; CHECK-NEXT: .LBB0_8: # %optional4 +; CHECK-NEXT: bl d +; CHECK-NEXT: nop +; CHECK-NEXT: bl d +; CHECK-NEXT: nop +; CHECK-NEXT: bl d +; CHECK-NEXT: nop +; CHECK-NEXT: bl d +; CHECK-NEXT: nop +; CHECK-NEXT: b .LBB0_4 entry: br label %test1 test1: @@ -113,29 +158,47 @@ ; The CHECK statements check for the whole string of tests ; and then check that the correct test has been duplicated into the end of ; the optional blocks and that the optional blocks are in the correct order. -;CHECK-LABEL: straight_test_50: ; test1 may have been merged with entry -;CHECK: mr [[TAGREG:[0-9]+]], 3 -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 -;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]] -;CHECK-NEXT: # %test2 -;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 2 -;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]] -;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3 -;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 4 -;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]] -;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit -;CHECK: blr -;CHECK-NEXT: .[[OPT1LABEL]]: -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 2 -;CHECK-NEXT: beq 0, .[[TEST3LABEL]] -;CHECK-NEXT: .[[OPT2LABEL]]: -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 4 -;CHECK-NEXT: beq 0, .[[EXITLABEL]] -;CHECK-NEXT: .[[OPT3LABEL]]: -;CHECK: b .[[EXITLABEL]] define void @straight_test_50(i32 %tag) { +; CHECK-LABEL: straight_test_50: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -48(1) +; CHECK-NEXT: mr 30, 3 +; CHECK-NEXT: andi. 3, 30, 1 +; CHECK-NEXT: bc 12, 1, .LBB1_4 +; CHECK-NEXT: # %bb.1: # %test2 +; CHECK-NEXT: andi. 3, 30, 2 +; CHECK-NEXT: bne 0, .LBB1_5 +; CHECK-NEXT: .LBB1_2: # %test3 +; CHECK-NEXT: andi. 3, 30, 4 +; CHECK-NEXT: bne 0, .LBB1_6 +; CHECK-NEXT: .LBB1_3: # %exit +; CHECK-NEXT: addi 1, 1, 48 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB1_4: # %optional1 +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: andi. 3, 30, 2 +; CHECK-NEXT: beq 0, .LBB1_2 +; CHECK-NEXT: .LBB1_5: # %optional2 +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: andi. 3, 30, 4 +; CHECK-NEXT: beq 0, .LBB1_3 +; CHECK-NEXT: .LBB1_6: # %optional3 +; CHECK-NEXT: bl c +; CHECK-NEXT: nop +; CHECK-NEXT: b .LBB1_3 entry: br label %test1 test1: @@ -183,51 +246,89 @@ ; optional3 ; exit -;CHECK-LABEL: straight_test_3_instr_test: ; test1 may have been merged with entry -;CHECK: mr [[TAGREG:[0-9]+]], 3 -;CHECK: clrlwi {{[0-9]+}}, [[TAGREG]], 30 -;CHECK-NEXT: cmplwi {{[0-9]+}}, 2 - -;CHECK-O3-NEXT: bne 0, .[[OPT1LABEL:[_0-9A-Za-z]+]] -;CHECK-O3-NEXT: # %test2 -;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29 -;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8 -;CHECK-O3-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]] -;CHECK-O3-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3 -;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27 -;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32 -;CHECK-O3-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]] -;CHECK-O3-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit -;CHECK-O3: blr -;CHECK-O3-NEXT: .[[OPT1LABEL]]: -;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29 -;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8 -;CHECK-O3-NEXT: beq 0, .[[TEST3LABEL]] -;CHECK-O3-NEXT: .[[OPT2LABEL]]: -;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27 -;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32 -;CHECK-O3-NEXT: beq 0, .[[EXITLABEL]] -;CHECK-O3-NEXT: .[[OPT3LABEL]]: -;CHECK-O3: b .[[EXITLABEL]] - -;CHECK-O2-NEXT: beq 0, .[[TEST2LABEL:[_0-9A-Za-z]+]] -;CHECK-O2-NEXT: # %optional1 -;CHECK-O2: .[[TEST2LABEL]]: # %test2 -;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29 -;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 8 -;CHECK-O2-NEXT: beq 0, .[[TEST3LABEL:[_0-9A-Za-z]+]] -;CHECK-O2-NEXT: # %optional2 -;CHECK-O2: .[[TEST3LABEL]]: # %test3 -;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27 -;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 32 -;CHECK-O2-NEXT: beq 0, .[[EXITLABEL:[_0-9A-Za-z]+]] -;CHECK-O2-NEXT: # %optional3 -;CHECK-O2: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit -;CHECK-O2: blr - define void @straight_test_3_instr_test(i32 %tag) { +; CHECK-O2-LABEL: straight_test_3_instr_test: +; CHECK-O2: # %bb.0: # %entry +; CHECK-O2-NEXT: mflr 0 +; CHECK-O2-NEXT: .cfi_def_cfa_offset 48 +; CHECK-O2-NEXT: .cfi_offset lr, 16 +; CHECK-O2-NEXT: .cfi_offset r30, -16 +; CHECK-O2-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-O2-NEXT: std 0, 16(1) +; CHECK-O2-NEXT: stdu 1, -48(1) +; CHECK-O2-NEXT: mr 30, 3 +; CHECK-O2-NEXT: clrlwi 3, 30, 30 +; CHECK-O2-NEXT: cmplwi 3, 2 +; CHECK-O2-NEXT: beq 0, .LBB2_2 +; CHECK-O2-NEXT: # %bb.1: # %optional1 +; CHECK-O2-NEXT: bl a +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: .LBB2_2: # %test2 +; CHECK-O2-NEXT: rlwinm 3, 30, 0, 28, 29 +; CHECK-O2-NEXT: cmplwi 3, 8 +; CHECK-O2-NEXT: beq 0, .LBB2_4 +; CHECK-O2-NEXT: # %bb.3: # %optional2 +; CHECK-O2-NEXT: bl b +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: .LBB2_4: # %test3 +; CHECK-O2-NEXT: rlwinm 3, 30, 0, 26, 27 +; CHECK-O2-NEXT: cmplwi 3, 32 +; CHECK-O2-NEXT: beq 0, .LBB2_6 +; CHECK-O2-NEXT: # %bb.5: # %optional3 +; CHECK-O2-NEXT: bl c +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: .LBB2_6: # %exit +; CHECK-O2-NEXT: addi 1, 1, 48 +; CHECK-O2-NEXT: ld 0, 16(1) +; CHECK-O2-NEXT: mtlr 0 +; CHECK-O2-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-O2-NEXT: blr +; +; CHECK-O3-LABEL: straight_test_3_instr_test: +; CHECK-O3: # %bb.0: # %entry +; CHECK-O3-NEXT: mflr 0 +; CHECK-O3-NEXT: .cfi_def_cfa_offset 48 +; CHECK-O3-NEXT: .cfi_offset lr, 16 +; CHECK-O3-NEXT: .cfi_offset r30, -16 +; CHECK-O3-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-O3-NEXT: std 0, 16(1) +; CHECK-O3-NEXT: stdu 1, -48(1) +; CHECK-O3-NEXT: mr 30, 3 +; CHECK-O3-NEXT: clrlwi 3, 30, 30 +; CHECK-O3-NEXT: cmplwi 3, 2 +; CHECK-O3-NEXT: bne 0, .LBB2_4 +; CHECK-O3-NEXT: # %bb.1: # %test2 +; CHECK-O3-NEXT: rlwinm 3, 30, 0, 28, 29 +; CHECK-O3-NEXT: cmplwi 3, 8 +; CHECK-O3-NEXT: bne 0, .LBB2_5 +; CHECK-O3-NEXT: .LBB2_2: # %test3 +; CHECK-O3-NEXT: rlwinm 3, 30, 0, 26, 27 +; CHECK-O3-NEXT: cmplwi 3, 32 +; CHECK-O3-NEXT: bne 0, .LBB2_6 +; CHECK-O3-NEXT: .LBB2_3: # %exit +; CHECK-O3-NEXT: addi 1, 1, 48 +; CHECK-O3-NEXT: ld 0, 16(1) +; CHECK-O3-NEXT: mtlr 0 +; CHECK-O3-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-O3-NEXT: blr +; CHECK-O3-NEXT: .LBB2_4: # %optional1 +; CHECK-O3-NEXT: bl a +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: rlwinm 3, 30, 0, 28, 29 +; CHECK-O3-NEXT: cmplwi 3, 8 +; CHECK-O3-NEXT: beq 0, .LBB2_2 +; CHECK-O3-NEXT: .LBB2_5: # %optional2 +; CHECK-O3-NEXT: bl b +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: rlwinm 3, 30, 0, 26, 27 +; CHECK-O3-NEXT: cmplwi 3, 32 +; CHECK-O3-NEXT: beq 0, .LBB2_3 +; CHECK-O3-NEXT: .LBB2_6: # %optional3 +; CHECK-O3-NEXT: bl c +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: b .LBB2_3 entry: br label %test1 test1: @@ -274,38 +375,217 @@ ; The CHECK statements check for the whole string of tests and exit block, ; and then check that the correct test has been duplicated into the end of ; the optional blocks and that the optional blocks are in the correct order. -;CHECK-LABEL: loop_test: -;CHECK: add [[TAGPTRREG:[0-9]+]], 3, 4 -;CHECK: .[[LATCHLABEL:[._0-9A-Za-z]+]]: # %for.latch -;CHECK: addi -;CHECK-O2: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check -;CHECK: lwz [[TAGREG:[0-9]+]], 0([[TAGPTRREG]]) -;CHECK-O3: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check -;CHECK: # %bb.{{[0-9]+}}: # %test1 -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 -;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: # %test2 -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 2 -;CHECK-NEXT: bne 0, .[[OPT2LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: .[[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3 -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 4 -;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: .[[TEST4LABEL:[._0-9A-Za-z]+]]: # %{{(test4|optional3)}} -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 8 -;CHECK-NEXT: beq 0, .[[LATCHLABEL]] -;CHECK-NEXT: b .[[OPT4LABEL:[._0-9A-Za-z]+]] -;CHECK: [[OPT1LABEL]] -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 2 -;CHECK-NEXT: beq 0, .[[TEST3LABEL]] -;CHECK-NEXT: .[[OPT2LABEL]] -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 4 -;CHECK-NEXT: beq 0, .[[TEST4LABEL]] -;CHECK-NEXT: .[[OPT3LABEL]] -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 8 -;CHECK-NEXT: beq 0, .[[LATCHLABEL]] -;CHECK: [[OPT4LABEL]]: -;CHECK: b .[[LATCHLABEL]] define void @loop_test(i32* %tags, i32 %count) { +; CHECK-O2-LABEL: loop_test: +; CHECK-O2: # %bb.0: # %entry +; CHECK-O2-NEXT: mflr 0 +; CHECK-O2-NEXT: .cfi_def_cfa_offset 64 +; CHECK-O2-NEXT: .cfi_offset lr, 16 +; CHECK-O2-NEXT: .cfi_offset r28, -32 +; CHECK-O2-NEXT: .cfi_offset r29, -24 +; CHECK-O2-NEXT: .cfi_offset r30, -16 +; CHECK-O2-NEXT: std 28, -32(1) # 8-byte Folded Spill +; CHECK-O2-NEXT: std 29, -24(1) # 8-byte Folded Spill +; CHECK-O2-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-O2-NEXT: std 0, 16(1) +; CHECK-O2-NEXT: stdu 1, -64(1) +; CHECK-O2-NEXT: mr 30, 4 +; CHECK-O2-NEXT: extsw 4, 4 +; CHECK-O2-NEXT: sldi 4, 4, 2 +; CHECK-O2-NEXT: add 29, 3, 4 +; CHECK-O2-NEXT: b .LBB3_2 +; CHECK-O2-NEXT: .p2align 4 +; CHECK-O2-NEXT: .LBB3_1: # %for.latch +; CHECK-O2-NEXT: # +; CHECK-O2-NEXT: addi 30, 30, -1 +; CHECK-O2-NEXT: .LBB3_2: # %for.check +; CHECK-O2-NEXT: # +; CHECK-O2-NEXT: lwz 28, 0(29) +; CHECK-O2-NEXT: cmpwi 30, 0 +; CHECK-O2-NEXT: bc 12, 2, .LBB3_12 +; CHECK-O2-NEXT: # %bb.3: # %for.check +; CHECK-O2-NEXT: # +; CHECK-O2-NEXT: cmpwi 28, 0 +; CHECK-O2-NEXT: bc 4, 2, .LBB3_12 +; CHECK-O2-NEXT: # %bb.4: # %test1 +; CHECK-O2-NEXT: # +; CHECK-O2-NEXT: andi. 3, 28, 1 +; CHECK-O2-NEXT: bc 12, 1, .LBB3_8 +; CHECK-O2-NEXT: # %bb.5: # %test2 +; CHECK-O2-NEXT: # +; CHECK-O2-NEXT: andi. 3, 28, 2 +; CHECK-O2-NEXT: bne 0, .LBB3_9 +; CHECK-O2-NEXT: .LBB3_6: # %test3 +; CHECK-O2-NEXT: # +; CHECK-O2-NEXT: andi. 3, 28, 4 +; CHECK-O2-NEXT: bne 0, .LBB3_10 +; CHECK-O2-NEXT: .LBB3_7: # %test4 +; CHECK-O2-NEXT: # +; CHECK-O2-NEXT: andi. 3, 28, 8 +; CHECK-O2-NEXT: beq 0, .LBB3_1 +; CHECK-O2-NEXT: b .LBB3_11 +; CHECK-O2-NEXT: .p2align 4 +; CHECK-O2-NEXT: .LBB3_8: # %optional1 +; CHECK-O2-NEXT: # +; CHECK-O2-NEXT: bl a +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: bl a +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: bl a +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: bl a +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: andi. 3, 28, 2 +; CHECK-O2-NEXT: beq 0, .LBB3_6 +; CHECK-O2-NEXT: .LBB3_9: # %optional2 +; CHECK-O2-NEXT: # +; CHECK-O2-NEXT: bl b +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: bl b +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: bl b +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: bl b +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: andi. 3, 28, 4 +; CHECK-O2-NEXT: beq 0, .LBB3_7 +; CHECK-O2-NEXT: .LBB3_10: # %optional3 +; CHECK-O2-NEXT: # +; CHECK-O2-NEXT: bl c +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: bl c +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: bl c +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: bl c +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: andi. 3, 28, 8 +; CHECK-O2-NEXT: beq 0, .LBB3_1 +; CHECK-O2-NEXT: .LBB3_11: # %optional4 +; CHECK-O2-NEXT: # +; CHECK-O2-NEXT: bl d +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: bl d +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: bl d +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: bl d +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: b .LBB3_1 +; CHECK-O2-NEXT: .LBB3_12: # %exit +; CHECK-O2-NEXT: addi 1, 1, 64 +; CHECK-O2-NEXT: ld 0, 16(1) +; CHECK-O2-NEXT: mtlr 0 +; CHECK-O2-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-O2-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; CHECK-O2-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; CHECK-O2-NEXT: blr +; +; CHECK-O3-LABEL: loop_test: +; CHECK-O3: # %bb.0: # %entry +; CHECK-O3-NEXT: mflr 0 +; CHECK-O3-NEXT: .cfi_def_cfa_offset 64 +; CHECK-O3-NEXT: .cfi_offset lr, 16 +; CHECK-O3-NEXT: .cfi_offset r28, -32 +; CHECK-O3-NEXT: .cfi_offset r29, -24 +; CHECK-O3-NEXT: .cfi_offset r30, -16 +; CHECK-O3-NEXT: std 28, -32(1) # 8-byte Folded Spill +; CHECK-O3-NEXT: std 29, -24(1) # 8-byte Folded Spill +; CHECK-O3-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-O3-NEXT: std 0, 16(1) +; CHECK-O3-NEXT: stdu 1, -64(1) +; CHECK-O3-NEXT: mr 30, 4 +; CHECK-O3-NEXT: extsw 4, 4 +; CHECK-O3-NEXT: sldi 4, 4, 2 +; CHECK-O3-NEXT: add 29, 3, 4 +; CHECK-O3-NEXT: lwz 28, 0(29) +; CHECK-O3-NEXT: cmpwi 30, 0 +; CHECK-O3-NEXT: bc 4, 2, .LBB3_2 +; CHECK-O3-NEXT: b .LBB3_11 +; CHECK-O3-NEXT: .p2align 4 +; CHECK-O3-NEXT: .LBB3_1: # %for.latch +; CHECK-O3-NEXT: # +; CHECK-O3-NEXT: addi 30, 30, -1 +; CHECK-O3-NEXT: lwz 28, 0(29) +; CHECK-O3-NEXT: cmpwi 30, 0 +; CHECK-O3-NEXT: bc 12, 2, .LBB3_11 +; CHECK-O3-NEXT: .LBB3_2: # %for.check +; CHECK-O3-NEXT: # +; CHECK-O3-NEXT: cmpwi 28, 0 +; CHECK-O3-NEXT: bc 4, 2, .LBB3_11 +; CHECK-O3-NEXT: # %bb.3: # %test1 +; CHECK-O3-NEXT: # +; CHECK-O3-NEXT: andi. 3, 28, 1 +; CHECK-O3-NEXT: bc 12, 1, .LBB3_7 +; CHECK-O3-NEXT: # %bb.4: # %test2 +; CHECK-O3-NEXT: # +; CHECK-O3-NEXT: andi. 3, 28, 2 +; CHECK-O3-NEXT: bne 0, .LBB3_8 +; CHECK-O3-NEXT: .LBB3_5: # %test3 +; CHECK-O3-NEXT: # +; CHECK-O3-NEXT: andi. 3, 28, 4 +; CHECK-O3-NEXT: bne 0, .LBB3_9 +; CHECK-O3-NEXT: .LBB3_6: # %test4 +; CHECK-O3-NEXT: # +; CHECK-O3-NEXT: andi. 3, 28, 8 +; CHECK-O3-NEXT: beq 0, .LBB3_1 +; CHECK-O3-NEXT: b .LBB3_10 +; CHECK-O3-NEXT: .p2align 4 +; CHECK-O3-NEXT: .LBB3_7: # %optional1 +; CHECK-O3-NEXT: # +; CHECK-O3-NEXT: bl a +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: bl a +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: bl a +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: bl a +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: andi. 3, 28, 2 +; CHECK-O3-NEXT: beq 0, .LBB3_5 +; CHECK-O3-NEXT: .LBB3_8: # %optional2 +; CHECK-O3-NEXT: # +; CHECK-O3-NEXT: bl b +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: bl b +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: bl b +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: bl b +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: andi. 3, 28, 4 +; CHECK-O3-NEXT: beq 0, .LBB3_6 +; CHECK-O3-NEXT: .LBB3_9: # %optional3 +; CHECK-O3-NEXT: # +; CHECK-O3-NEXT: bl c +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: bl c +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: bl c +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: bl c +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: andi. 3, 28, 8 +; CHECK-O3-NEXT: beq 0, .LBB3_1 +; CHECK-O3-NEXT: .LBB3_10: # %optional4 +; CHECK-O3-NEXT: # +; CHECK-O3-NEXT: bl d +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: bl d +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: bl d +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: bl d +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: b .LBB3_1 +; CHECK-O3-NEXT: .LBB3_11: # %exit +; CHECK-O3-NEXT: addi 1, 1, 64 +; CHECK-O3-NEXT: ld 0, 16(1) +; CHECK-O3-NEXT: mtlr 0 +; CHECK-O3-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-O3-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; CHECK-O3-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; CHECK-O3-NEXT: blr entry: br label %for.check for.check: @@ -368,25 +648,89 @@ ; test2 and copied. The purpose here is to make sure that the tail-duplication ; code is independent of the outlining code, which works by choosing the ; "unavoidable" blocks. -; CHECK-LABEL: avoidable_test: -; CHECK: # %bb.{{[0-9]+}}: # %entry -; CHECK: andi. -; CHECK: # %bb.{{[0-9]+}}: # %test2 -; Make sure then2 falls through from test2 -; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} -; CHECK: # %bb.{{[0-9]+}}: # %then2 -; CHECK: andi. {{[0-9]+}}, {{[0-9]+}}, 4 -; CHECK: # %else1 -; CHECK: bl a -; CHECK: bl a ; Make sure then2 was copied into else1 -; CHECK: andi. {{[0-9]+}}, {{[0-9]+}}, 4 -; CHECK: # %end1 -; CHECK: bl d -; CHECK: # %else2 -; CHECK: bl c -; CHECK: # %end2 define void @avoidable_test(i32 %tag) { +; CHECK-O2-LABEL: avoidable_test: +; CHECK-O2: # %bb.0: # %entry +; CHECK-O2-NEXT: mflr 0 +; CHECK-O2-NEXT: std 0, 16(1) +; CHECK-O2-NEXT: stdu 1, -48(1) +; CHECK-O2-NEXT: .cfi_def_cfa_offset 48 +; CHECK-O2-NEXT: .cfi_offset lr, 16 +; CHECK-O2-NEXT: andi. 4, 3, 1 +; CHECK-O2-NEXT: bc 12, 1, .LBB4_3 +; CHECK-O2-NEXT: # %bb.1: # %test2 +; CHECK-O2-NEXT: andi. 4, 3, 2 +; CHECK-O2-NEXT: bne 0, .LBB4_5 +; CHECK-O2-NEXT: # %bb.2: # %then2 +; CHECK-O2-NEXT: andi. 3, 3, 4 +; CHECK-O2-NEXT: beq 0, .LBB4_6 +; CHECK-O2-NEXT: b .LBB4_4 +; CHECK-O2-NEXT: .LBB4_3: # %else1 +; CHECK-O2-NEXT: std 3, 40(1) # 8-byte Folded Spill +; CHECK-O2-NEXT: bl a +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: bl a +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: ld 3, 40(1) # 8-byte Folded Reload +; CHECK-O2-NEXT: andi. 3, 3, 4 +; CHECK-O2-NEXT: beq 0, .LBB4_6 +; CHECK-O2-NEXT: .LBB4_4: # %end1 +; CHECK-O2-NEXT: bl d +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: b .LBB4_6 +; CHECK-O2-NEXT: .LBB4_5: # %else2 +; CHECK-O2-NEXT: bl c +; CHECK-O2-NEXT: nop +; CHECK-O2-NEXT: .LBB4_6: # %end2 +; CHECK-O2-NEXT: addi 1, 1, 48 +; CHECK-O2-NEXT: ld 0, 16(1) +; CHECK-O2-NEXT: mtlr 0 +; CHECK-O2-NEXT: blr +; +; CHECK-O3-LABEL: avoidable_test: +; CHECK-O3: # %bb.0: # %entry +; CHECK-O3-NEXT: mflr 0 +; CHECK-O3-NEXT: std 0, 16(1) +; CHECK-O3-NEXT: stdu 1, -48(1) +; CHECK-O3-NEXT: .cfi_def_cfa_offset 48 +; CHECK-O3-NEXT: .cfi_offset lr, 16 +; CHECK-O3-NEXT: andi. 4, 3, 1 +; CHECK-O3-NEXT: bc 12, 1, .LBB4_4 +; CHECK-O3-NEXT: # %bb.1: # %test2 +; CHECK-O3-NEXT: andi. 4, 3, 2 +; CHECK-O3-NEXT: bne 0, .LBB4_6 +; CHECK-O3-NEXT: # %bb.2: # %then2 +; CHECK-O3-NEXT: andi. 3, 3, 4 +; CHECK-O3-NEXT: bne 0, .LBB4_5 +; CHECK-O3-NEXT: .LBB4_3: # %end2 +; CHECK-O3-NEXT: addi 1, 1, 48 +; CHECK-O3-NEXT: ld 0, 16(1) +; CHECK-O3-NEXT: mtlr 0 +; CHECK-O3-NEXT: blr +; CHECK-O3-NEXT: .LBB4_4: # %else1 +; CHECK-O3-NEXT: std 3, 40(1) # 8-byte Folded Spill +; CHECK-O3-NEXT: bl a +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: bl a +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: ld 3, 40(1) # 8-byte Folded Reload +; CHECK-O3-NEXT: andi. 3, 3, 4 +; CHECK-O3-NEXT: beq 0, .LBB4_3 +; CHECK-O3-NEXT: .LBB4_5: # %end1 +; CHECK-O3-NEXT: bl d +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: addi 1, 1, 48 +; CHECK-O3-NEXT: ld 0, 16(1) +; CHECK-O3-NEXT: mtlr 0 +; CHECK-O3-NEXT: blr +; CHECK-O3-NEXT: .LBB4_6: # %else2 +; CHECK-O3-NEXT: bl c +; CHECK-O3-NEXT: nop +; CHECK-O3-NEXT: addi 1, 1, 48 +; CHECK-O3-NEXT: ld 0, 16(1) +; CHECK-O3-NEXT: mtlr 0 +; CHECK-O3-NEXT: blr entry: br label %test1 test1: @@ -415,7 +759,6 @@ ret void } -; CHECK-LABEL: trellis_test ; The number in the block labels is the expected block frequency given the ; probabilities annotated. There is a conflict in the b;c->d;e trellis that ; should be resolved as c->e;b->d. @@ -423,18 +766,95 @@ ; The f;g->h;i trellis should be resolved as f->i;g->h. ; The h;i->j;ret trellis contains a triangle edge, and should be resolved as ; h->j->ret -; CHECK: # %bb.{{[0-9]+}}: # %entry -; CHECK: # %bb.{{[0-9]+}}: # %c10 -; CHECK: # %e9 -; CHECK: # %g10 -; CHECK: # %h10 -; CHECK: # %j8 -; CHECK: # %ret -; CHECK: # %b6 -; CHECK: # %d7 -; CHECK: # %f6 -; CHECK: # %i6 define void @trellis_test(i32 %tag) { +; CHECK-LABEL: trellis_test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -48(1) +; CHECK-NEXT: mr 30, 3 +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: andi. 3, 30, 3 +; CHECK-NEXT: bne 0, .LBB5_7 +; CHECK-NEXT: # %bb.1: # %c10 +; CHECK-NEXT: bl c +; CHECK-NEXT: nop +; CHECK-NEXT: bl c +; CHECK-NEXT: nop +; CHECK-NEXT: andi. 3, 30, 12 +; CHECK-NEXT: bne 0, .LBB5_8 +; CHECK-NEXT: .LBB5_2: # %e9 +; CHECK-NEXT: bl e +; CHECK-NEXT: nop +; CHECK-NEXT: bl e +; CHECK-NEXT: nop +; CHECK-NEXT: andi. 3, 30, 48 +; CHECK-NEXT: bne 0, .LBB5_9 +; CHECK-NEXT: .LBB5_3: # %g10 +; CHECK-NEXT: bl g +; CHECK-NEXT: nop +; CHECK-NEXT: bl g +; CHECK-NEXT: nop +; CHECK-NEXT: andi. 3, 30, 192 +; CHECK-NEXT: beq 0, .LBB5_10 +; CHECK-NEXT: .LBB5_4: # %h10 +; CHECK-NEXT: bl h +; CHECK-NEXT: nop +; CHECK-NEXT: bl h +; CHECK-NEXT: nop +; CHECK-NEXT: rlwinm 3, 30, 0, 22, 23 +; CHECK-NEXT: cmplwi 3, 512 +; CHECK-NEXT: beq 0, .LBB5_6 +; CHECK-NEXT: .LBB5_5: # %j8 +; CHECK-NEXT: bl j +; CHECK-NEXT: nop +; CHECK-NEXT: bl j +; CHECK-NEXT: nop +; CHECK-NEXT: .LBB5_6: # %ret +; CHECK-NEXT: addi 1, 1, 48 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB5_7: # %b6 +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: rlwinm 3, 30, 0, 28, 29 +; CHECK-NEXT: cmplwi 3, 8 +; CHECK-NEXT: beq 0, .LBB5_2 +; CHECK-NEXT: .LBB5_8: # %d7 +; CHECK-NEXT: bl d +; CHECK-NEXT: nop +; CHECK-NEXT: bl d +; CHECK-NEXT: nop +; CHECK-NEXT: rlwinm 3, 30, 0, 26, 27 +; CHECK-NEXT: cmplwi 3, 32 +; CHECK-NEXT: beq 0, .LBB5_3 +; CHECK-NEXT: .LBB5_9: # %f6 +; CHECK-NEXT: bl f +; CHECK-NEXT: nop +; CHECK-NEXT: bl f +; CHECK-NEXT: nop +; CHECK-NEXT: rlwinm 3, 30, 0, 24, 25 +; CHECK-NEXT: cmplwi 3, 128 +; CHECK-NEXT: bne 0, .LBB5_4 +; CHECK-NEXT: .LBB5_10: # %i6 +; CHECK-NEXT: bl i +; CHECK-NEXT: nop +; CHECK-NEXT: bl i +; CHECK-NEXT: nop +; CHECK-NEXT: andi. 3, 30, 768 +; CHECK-NEXT: bne 0, .LBB5_5 +; CHECK-NEXT: b .LBB5_6 entry: br label %a16 a16: @@ -506,18 +926,76 @@ ; of both F and G. The basic trellis algorithm picks the F->G edge, but after ; checking, it's profitable to duplicate G into F. The weights here are not ; really important. They are there to help make the test stable. -; CHECK-LABEL: trellis_then_dup_test -; CHECK: # %bb.{{[0-9]+}}: # %entry -; CHECK: # %bb.{{[0-9]+}}: # %b -; CHECK: # %d -; CHECK: # %g -; CHECK: # %ret1 -; CHECK: # %c -; CHECK: # %e -; CHECK: # %f -; CHECK: # %ret2 -; CHECK: # %ret define void @trellis_then_dup_test(i32 %tag) { +; CHECK-LABEL: trellis_then_dup_test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -48(1) +; CHECK-NEXT: mr 30, 3 +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: andi. 3, 30, 3 +; CHECK-NEXT: bne 0, .LBB6_5 +; CHECK-NEXT: # %bb.1: # %b +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: rlwinm 3, 30, 0, 28, 29 +; CHECK-NEXT: cmplwi 3, 8 +; CHECK-NEXT: bne 0, .LBB6_6 +; CHECK-NEXT: .LBB6_2: # %d +; CHECK-NEXT: bl d +; CHECK-NEXT: nop +; CHECK-NEXT: bl d +; CHECK-NEXT: nop +; CHECK-NEXT: rlwinm 3, 30, 0, 26, 27 +; CHECK-NEXT: cmplwi 3, 32 +; CHECK-NEXT: bne 0, .LBB6_7 +; CHECK-NEXT: .LBB6_3: # %g +; CHECK-NEXT: andi. 3, 30, 192 +; CHECK-NEXT: bne 0, .LBB6_8 +; CHECK-NEXT: .LBB6_4: # %ret1 +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: b .LBB6_9 +; CHECK-NEXT: .LBB6_5: # %c +; CHECK-NEXT: bl c +; CHECK-NEXT: nop +; CHECK-NEXT: bl c +; CHECK-NEXT: nop +; CHECK-NEXT: andi. 3, 30, 12 +; CHECK-NEXT: beq 0, .LBB6_2 +; CHECK-NEXT: .LBB6_6: # %e +; CHECK-NEXT: bl e +; CHECK-NEXT: nop +; CHECK-NEXT: bl e +; CHECK-NEXT: nop +; CHECK-NEXT: andi. 3, 30, 48 +; CHECK-NEXT: beq 0, .LBB6_3 +; CHECK-NEXT: .LBB6_7: # %f +; CHECK-NEXT: bl f +; CHECK-NEXT: nop +; CHECK-NEXT: bl f +; CHECK-NEXT: nop +; CHECK-NEXT: andi. 3, 30, 192 +; CHECK-NEXT: beq 0, .LBB6_4 +; CHECK-NEXT: .LBB6_8: # %ret2 +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: .LBB6_9: # %ret +; CHECK-NEXT: addi 1, 1, 48 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-NEXT: blr entry: br label %a a: @@ -570,14 +1048,60 @@ ; Verify that we did not mis-identify triangle trellises if it is not ; really a triangle. -; CHECK-LABEL: trellis_no_triangle -; CHECK: # %bb.{{[0-9]+}}: # %entry -; CHECK: # %bb.{{[0-9]+}}: # %b -; CHECK: # %d -; CHECK: # %ret -; CHECK: # %c -; CHECK: # %e define void @trellis_no_triangle(i32 %tag) { +; CHECK-LABEL: trellis_no_triangle: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -48(1) +; CHECK-NEXT: mr 30, 3 +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: bl a +; CHECK-NEXT: nop +; CHECK-NEXT: andi. 3, 30, 3 +; CHECK-NEXT: bne 0, .LBB7_4 +; CHECK-NEXT: # %bb.1: # %b +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: rlwinm 3, 30, 0, 28, 29 +; CHECK-NEXT: cmplwi 3, 8 +; CHECK-NEXT: bne 0, .LBB7_5 +; CHECK-NEXT: .LBB7_2: # %d +; CHECK-NEXT: bl d +; CHECK-NEXT: nop +; CHECK-NEXT: bl d +; CHECK-NEXT: nop +; CHECK-NEXT: rlwinm 3, 30, 0, 26, 27 +; CHECK-NEXT: cmplwi 3, 32 +; CHECK-NEXT: bne 0, .LBB7_5 +; CHECK-NEXT: .LBB7_3: # %ret +; CHECK-NEXT: bl f +; CHECK-NEXT: nop +; CHECK-NEXT: addi 1, 1, 48 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB7_4: # %c +; CHECK-NEXT: bl c +; CHECK-NEXT: nop +; CHECK-NEXT: bl c +; CHECK-NEXT: nop +; CHECK-NEXT: andi. 3, 30, 12 +; CHECK-NEXT: beq 0, .LBB7_2 +; CHECK-NEXT: .LBB7_5: # %e +; CHECK-NEXT: bl e +; CHECK-NEXT: nop +; CHECK-NEXT: bl e +; CHECK-NEXT: nop +; CHECK-NEXT: b .LBB7_3 entry: br label %a a: Index: llvm/test/CodeGen/X86/atom-fixup-lea2.ll =================================================================== --- llvm/test/CodeGen/X86/atom-fixup-lea2.ll +++ llvm/test/CodeGen/X86/atom-fixup-lea2.ll @@ -1,11 +1,5 @@ -; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s - -; CHECK:%bb.5 -; CHECK-NEXT:leal -; CHECK-NEXT:leal -; CHECK-NEXT:leal -; CHECK-NEXT:movl - +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s ; Test for fixup lea pre-emit pass. LEA instructions should be substituted for ; ADD instructions which compute the address and index of the load because they @@ -35,6 +29,54 @@ %struct.node_t = type { i32, i32, i32, i32, i32* } define i32 @test() { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: leal -{{[0-9]+}}(%esp), %esp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset %esi, -12 +; CHECK-NEXT: .cfi_offset %edi, -8 +; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, (%esp) +; CHECK-NEXT: calll getnode +; CHECK-NEXT: leal -{{[0-9]+}}(%esp), %esp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: je .LBB0_6 +; CHECK-NEXT: # %bb.1: # %land.lhs.true +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: testl %edx, %edx +; CHECK-NEXT: jle .LBB0_6 +; CHECK-NEXT: # %bb.2: # %land.lhs.true2 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: testl %esi, %esi +; CHECK-NEXT: jle .LBB0_6 +; CHECK-NEXT: # %bb.3: # %land.lhs.true4 +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: testl %esi, %esi +; CHECK-NEXT: jle .LBB0_6 +; CHECK-NEXT: # %bb.4: # %land.lhs.true7 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jle .LBB0_6 +; CHECK-NEXT: # %bb.5: # %if.then +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; CHECK-NEXT: leal (%edx,%ecx), %edx +; CHECK-NEXT: leal (%esi,%edi), %esi +; CHECK-NEXT: movl (%edx,%esi,4), %eax +; CHECK-NEXT: .LBB0_6: # %if.end +; CHECK-NEXT: leal {{[0-9]+}}(%esp), %esp +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: %n = alloca %struct.node_t, align 4 call void bitcast (void (%struct.node_t*, ...)* @getnode to void (%struct.node_t*)*)(%struct.node_t* sret %n) Index: llvm/test/CodeGen/X86/block-placement.ll =================================================================== --- llvm/test/CodeGen/X86/block-placement.ll +++ llvm/test/CodeGen/X86/block-placement.ll @@ -1068,7 +1068,6 @@ ; CHECK-LABEL: test_cold_calls: ; CHECK: %entry ; CHECK: %else -; CHECK: %exit ; CHECK: %then entry: Index: llvm/test/CodeGen/X86/bmi.ll =================================================================== --- llvm/test/CodeGen/X86/bmi.ll +++ llvm/test/CodeGen/X86/bmi.ll @@ -1056,31 +1056,39 @@ define i32 @blsr32_branch(i32 %x) { ; X86-LABEL: blsr32_branch: ; X86: # %bb.0: -; X86-NEXT: pushl %esi +; X86-NEXT: pushl %eax ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %esi, -8 -; X86-NEXT: blsrl {{[0-9]+}}(%esp), %esi -; X86-NEXT: jne .LBB46_2 -; X86-NEXT: # %bb.1: +; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax +; X86-NEXT: je .LBB46_1 +; X86-NEXT: # %bb.2: +; X86-NEXT: popl %ecx +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; X86-NEXT: .LBB46_1: +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: calll bar -; X86-NEXT: .LBB46_2: -; X86-NEXT: movl %esi, %eax -; X86-NEXT: popl %esi +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: popl %ecx ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X64-LABEL: blsr32_branch: ; X64: # %bb.0: -; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: blsrl %edi, %eax +; X64-NEXT: je .LBB46_1 +; X64-NEXT: # %bb.2: +; X64-NEXT: popq %rcx +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; X64-NEXT: .LBB46_1: ; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: .cfi_offset %rbx, -16 -; X64-NEXT: blsrl %edi, %ebx -; X64-NEXT: jne .LBB46_2 -; X64-NEXT: # %bb.1: +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: callq bar -; X64-NEXT: .LBB46_2: -; X64-NEXT: movl %ebx, %eax -; X64-NEXT: popq %rbx +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; X64-NEXT: popq %rcx ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq %tmp = sub i32 %x, 1 @@ -1096,46 +1104,51 @@ define i64 @blsr64_branch(i64 %x) { ; X86-LABEL: blsr64_branch: ; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: pushl %esi -; X86-NEXT: .cfi_def_cfa_offset 12 -; X86-NEXT: .cfi_offset %esi, -12 -; X86-NEXT: .cfi_offset %edi, -8 -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: subl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %eax, %esi -; X86-NEXT: addl $-1, %esi -; X86-NEXT: movl %ecx, %edi -; X86-NEXT: adcl $-1, %edi -; X86-NEXT: andl %eax, %esi -; X86-NEXT: andl %ecx, %edi -; X86-NEXT: movl %esi, %eax -; X86-NEXT: orl %edi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: addl $-1, %eax +; X86-NEXT: movl %esi, %edx +; X86-NEXT: adcl $-1, %edx +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: andl %esi, %edx +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: orl %edx, %ecx ; X86-NEXT: jne .LBB47_2 ; X86-NEXT: # %bb.1: +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, (%esp) # 4-byte Spill ; X86-NEXT: calll bar +; X86-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: .LBB47_2: -; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl %edi, %edx -; X86-NEXT: popl %esi +; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: popl %edi +; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X64-LABEL: blsr64_branch: ; X64: # %bb.0: -; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: blsrq %rdi, %rax +; X64-NEXT: je .LBB47_1 +; X64-NEXT: # %bb.2: +; X64-NEXT: popq %rcx +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; X64-NEXT: .LBB47_1: ; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: .cfi_offset %rbx, -16 -; X64-NEXT: blsrq %rdi, %rbx -; X64-NEXT: jne .LBB47_2 -; X64-NEXT: # %bb.1: +; X64-NEXT: movq %rax, (%rsp) # 8-byte Spill ; X64-NEXT: callq bar -; X64-NEXT: .LBB47_2: -; X64-NEXT: movq %rbx, %rax -; X64-NEXT: popq %rbx +; X64-NEXT: movq (%rsp), %rax # 8-byte Reload +; X64-NEXT: popq %rcx ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq %tmp = sub i64 %x, 1 @@ -1151,31 +1164,39 @@ define i32 @blsi32_branch(i32 %x) { ; X86-LABEL: blsi32_branch: ; X86: # %bb.0: -; X86-NEXT: pushl %esi +; X86-NEXT: pushl %eax ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %esi, -8 -; X86-NEXT: blsil {{[0-9]+}}(%esp), %esi -; X86-NEXT: jne .LBB48_2 -; X86-NEXT: # %bb.1: +; X86-NEXT: blsil {{[0-9]+}}(%esp), %eax +; X86-NEXT: je .LBB48_1 +; X86-NEXT: # %bb.2: +; X86-NEXT: popl %ecx +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; X86-NEXT: .LBB48_1: +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: calll bar -; X86-NEXT: .LBB48_2: -; X86-NEXT: movl %esi, %eax -; X86-NEXT: popl %esi +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: popl %ecx ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X64-LABEL: blsi32_branch: ; X64: # %bb.0: -; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %rax ; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: .cfi_offset %rbx, -16 -; X64-NEXT: blsil %edi, %ebx -; X64-NEXT: jne .LBB48_2 -; X64-NEXT: # %bb.1: +; X64-NEXT: blsil %edi, %eax +; X64-NEXT: je .LBB48_1 +; X64-NEXT: # %bb.2: +; X64-NEXT: popq %rcx +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; X64-NEXT: .LBB48_1: +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: callq bar -; X64-NEXT: .LBB48_2: -; X64-NEXT: movl %ebx, %eax -; X64-NEXT: popq %rbx +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; X64-NEXT: popq %rcx ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq %tmp = sub i32 0, %x @@ -1191,46 +1212,51 @@ define i64 @blsi64_branch(i64 %x) { ; X86-LABEL: blsi64_branch: ; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: pushl %esi -; X86-NEXT: .cfi_def_cfa_offset 12 -; X86-NEXT: .cfi_offset %esi, -12 -; X86-NEXT: .cfi_offset %edi, -8 -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: subl $8, %esp +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: movl %eax, %edi -; X86-NEXT: negl %edi -; X86-NEXT: sbbl %ecx, %esi -; X86-NEXT: andl %ecx, %esi -; X86-NEXT: andl %eax, %edi -; X86-NEXT: movl %edi, %eax -; X86-NEXT: orl %esi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: negl %eax +; X86-NEXT: sbbl %esi, %edx +; X86-NEXT: andl %esi, %edx +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: orl %edx, %ecx ; X86-NEXT: jne .LBB49_2 ; X86-NEXT: # %bb.1: +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: calll bar +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: .LBB49_2: -; X86-NEXT: movl %edi, %eax -; X86-NEXT: movl %esi, %edx -; X86-NEXT: popl %esi +; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: popl %edi +; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X64-LABEL: blsi64_branch: ; X64: # %bb.0: -; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: blsiq %rdi, %rax +; X64-NEXT: je .LBB49_1 +; X64-NEXT: # %bb.2: +; X64-NEXT: popq %rcx +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; X64-NEXT: .LBB49_1: ; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: .cfi_offset %rbx, -16 -; X64-NEXT: blsiq %rdi, %rbx -; X64-NEXT: jne .LBB49_2 -; X64-NEXT: # %bb.1: +; X64-NEXT: movq %rax, (%rsp) # 8-byte Spill ; X64-NEXT: callq bar -; X64-NEXT: .LBB49_2: -; X64-NEXT: movq %rbx, %rax -; X64-NEXT: popq %rbx +; X64-NEXT: movq (%rsp), %rax # 8-byte Reload +; X64-NEXT: popq %rcx ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq %tmp = sub i64 0, %x Index: llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll =================================================================== --- llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll +++ llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll @@ -12,70 +12,67 @@ ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: pushq %r15 ; CHECK-NEXT: pushq %r14 -; CHECK-NEXT: pushq %r13 ; CHECK-NEXT: pushq %r12 ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: movl %edx, %ebx -; CHECK-NEXT: movl %esi, %r12d -; CHECK-NEXT: movq %rdi, %r15 +; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: movl %edx, %r14d +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movq %rdi, %rbp ; CHECK-NEXT: callq c -; CHECK-NEXT: movl %eax, %r13d -; CHECK-NEXT: movq %r15, %rdi +; CHECK-NEXT: movl %eax, %r15d +; CHECK-NEXT: movq %rbp, %rdi ; CHECK-NEXT: callq l ; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: je .LBB0_1 ; CHECK-NEXT: .LBB0_10: # %cleanup -; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: addq $16, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 -; CHECK-NEXT: popq %r13 ; CHECK-NEXT: popq %r14 ; CHECK-NEXT: popq %r15 ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB0_1: # %if.end -; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %ebx, %eax +; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: cmpl $0, {{.*}}(%rip) +; CHECK-NEXT: # implicit-def: $r12d ; CHECK-NEXT: # implicit-def: $ebx -; CHECK-NEXT: # implicit-def: $r14d ; CHECK-NEXT: je .LBB0_4 ; CHECK-NEXT: # %bb.2: # %if.then4 -; CHECK-NEXT: movslq %r12d, %rdi +; CHECK-NEXT: movslq %eax, %rdi ; CHECK-NEXT: callq m -; CHECK-NEXT: # implicit-def: $ebx +; CHECK-NEXT: # implicit-def: $r12d ; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: .LBB0_3: # %r ; CHECK-NEXT: callq c -; CHECK-NEXT: movl %ebp, %r14d +; CHECK-NEXT: movl %ebp, %ebx ; CHECK-NEXT: .LBB0_4: # %if.end8 -; CHECK-NEXT: movl %ebx, %edi +; CHECK-NEXT: movl %r12d, %edi ; CHECK-NEXT: callq i ; CHECK-NEXT: movl %eax, %ebp -; CHECK-NEXT: orl %r14d, %ebp -; CHECK-NEXT: testl %r13d, %r13d +; CHECK-NEXT: orl %ebx, %ebp +; CHECK-NEXT: testl %r15d, %r15d ; CHECK-NEXT: je .LBB0_6 ; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: andl $4, %ebx +; CHECK-NEXT: andl $4, %r12d ; CHECK-NEXT: jmp .LBB0_3 ; CHECK-NEXT: .LBB0_6: # %if.end12 ; CHECK-NEXT: testl %ebp, %ebp ; CHECK-NEXT: je .LBB0_9 ; CHECK-NEXT: # %bb.7: # %if.then14 -; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: jmp .LBB0_10 ; CHECK-NEXT: .Ltmp0: # Block address taken ; CHECK-NEXT: .LBB0_8: # %if.then20.critedge ; CHECK-NEXT: movl {{.*}}(%rip), %edi -; CHECK-NEXT: movslq %eax, %rcx +; CHECK-NEXT: movslq %r14d, %rcx ; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: movq %r15, %rdx -; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: addq $16, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 -; CHECK-NEXT: popq %r13 ; CHECK-NEXT: popq %r14 ; CHECK-NEXT: popq %r15 ; CHECK-NEXT: popq %rbp Index: llvm/test/CodeGen/X86/cgp-usubo.ll =================================================================== --- llvm/test/CodeGen/X86/cgp-usubo.ll +++ llvm/test/CodeGen/X86/cgp-usubo.ll @@ -162,36 +162,34 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, i64* %p, i1 %cond) nounwind { ; CHECK-LABEL: usubo_ult_cmp_dominates_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: pushq %r15 -; CHECK-NEXT: pushq %r14 ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: movl %ecx, %ebp -; CHECK-NEXT: testb $1, %bpl +; CHECK-NEXT: subq $32, %rsp +; CHECK-NEXT: testb $1, %cl ; CHECK-NEXT: je .LBB9_2 ; CHECK-NEXT: # %bb.1: # %t -; CHECK-NEXT: movq %rdx, %r14 -; CHECK-NEXT: movq %rsi, %r15 -; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: cmpq %rsi, %rbx -; CHECK-NEXT: setb %dil +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: setb %al +; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movq %rsi, %rbx ; CHECK-NEXT: callq call -; CHECK-NEXT: subq %r15, %rbx +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; CHECK-NEXT: subq %rbx, %rsi ; CHECK-NEXT: jae .LBB9_2 ; CHECK-NEXT: # %bb.4: # %end ; CHECK-NEXT: setb %al -; CHECK-NEXT: movq %rbx, (%r14) +; CHECK-NEXT: movq %rsi, (%rdx) ; CHECK-NEXT: jmp .LBB9_3 ; CHECK-NEXT: .LBB9_2: # %f -; CHECK-NEXT: movl %ebp, %eax +; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: .LBB9_3: # %f -; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: addq $32, %rsp ; CHECK-NEXT: popq %rbx -; CHECK-NEXT: popq %r14 -; CHECK-NEXT: popq %r15 -; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq entry: br i1 %cond, label %t, label %f Index: llvm/test/CodeGen/X86/csr-split.ll =================================================================== --- llvm/test/CodeGen/X86/csr-split.ll +++ llvm/test/CodeGen/X86/csr-split.ll @@ -75,7 +75,6 @@ ; CHECK-NEXT: testq %rdi, %rdi ; CHECK-NEXT: je .LBB1_2 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: movslq {{.*}}(%rip), %rax ; CHECK-NEXT: cmpq %rdi, %rax ; CHECK-NEXT: je .LBB1_3 @@ -86,6 +85,7 @@ ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB1_3: # %if.then2 ; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: callq callVoid ; CHECK-NEXT: movq %rbx, %rdi ; CHECK-NEXT: popq %rbx Index: llvm/test/CodeGen/X86/fp128-cast.ll =================================================================== --- llvm/test/CodeGen/X86/fp128-cast.ll +++ llvm/test/CodeGen/X86/fp128-cast.ll @@ -877,18 +877,18 @@ ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: subl $36, %esp -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: cmpl $50001, {{[0-9]+}}(%esp) # imm = 0xC351 ; X32-NEXT: jl .LBB17_4 ; X32-NEXT: # %bb.1: # %if.then -; X32-NEXT: pushl %eax ; X32-NEXT: pushl %ecx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %edx +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi ; X32-NEXT: calll __trunctfdf2 ; X32-NEXT: addl $16, %esp ; X32-NEXT: fstpl {{[0-9]+}}(%esp) @@ -907,16 +907,16 @@ ; X32-NEXT: fstpl {{[0-9]+}}(%esp) ; X32-NEXT: calll __extenddftf2 ; X32-NEXT: addl $12, %esp -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: .LBB17_4: # %cleanup -; X32-NEXT: movl %edx, (%esi) -; X32-NEXT: movl %edi, 4(%esi) -; X32-NEXT: movl %ecx, 8(%esi) -; X32-NEXT: movl %eax, 12(%esi) -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %esi, (%eax) +; X32-NEXT: movl %edi, 4(%eax) +; X32-NEXT: movl %edx, 8(%eax) +; X32-NEXT: movl %ecx, 12(%eax) ; X32-NEXT: addl $36, %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi Index: llvm/test/CodeGen/X86/peep-test-4.ll =================================================================== --- llvm/test/CodeGen/X86/peep-test-4.ll +++ llvm/test/CodeGen/X86/peep-test-4.ll @@ -263,15 +263,17 @@ define void @testCTZ3(i32 %v) nounwind { ; CHECK-LABEL: testCTZ3: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: tzcntl %edi, %ebx -; CHECK-NEXT: jae .LBB13_2 -; CHECK-NEXT: # %bb.1: # %bb -; CHECK-NEXT: movl %ebx, %edi +; CHECK-NEXT: tzcntl %edi, %edi +; CHECK-NEXT: jb .LBB13_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: jmp foo32 # TAILCALL +; CHECK-NEXT: .LBB13_1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload ; CHECK-NEXT: callq foo -; CHECK-NEXT: .LBB13_2: # %return -; CHECK-NEXT: movl %ebx, %edi -; CHECK-NEXT: popq %rbx +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: jmp foo32 # TAILCALL %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) %cmp = icmp ne i32 %v, 0 Index: llvm/test/CodeGen/X86/ragreedy-bug.ll =================================================================== --- llvm/test/CodeGen/X86/ragreedy-bug.ll +++ llvm/test/CodeGen/X86/ragreedy-bug.ll @@ -1,37 +1,11 @@ -; RUN: llc < %s -mtriple=x86_64-apple-macosx -regalloc=greedy | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-apple-macosx -regalloc=greedy | FileCheck %s ; This testing case is reduced from 197.parser prune_match function. ; We make sure register copies are not generated on isupper.exit blocks. ; isupper.exit and isupper.exit223 get tail-duplicated into all their ; predecessors. -; CHECK: cond.true.i.i -; CHECK-NEXT: in Loop -; Mem-move -; CHECK-NEXT: movl -; CHECK-NEXT: andl -; CHECK-NEXT: testl -; CHECK-NEXT: jne -; CHECK: cond.true.i.i217 -; CHECK-NEXT: in Loop -; Mem-move -; CHECK-NEXT: movl -; CHECK-NEXT: andl -; CHECK-NEXT: testl -; CHECK-NEXT: je -; CHECK: cond.false.i.i -; CHECK: maskrune -; CHECK-NEXT: movzbl -; CHECK-NEXT: movzbl -; CHECK-NEXT: testl -; CHECK-NEXT: je -; CHECK: cond.false.i.i219 -; CHECK: maskrune -; CHECK-NEXT: movzbl -; CHECK-NEXT: movzbl -; CHECK-NEXT: testl -; CHECK-NEXT: jne - %struct.List_o_links_struct = type { i32, i32, i32, %struct.List_o_links_struct* } %struct.Connector_struct = type { i16, i16, i8, i8, %struct.Connector_struct*, i8* } %struct._RuneLocale = type { [8 x i8], [32 x i8], i32 (i8*, i64, i8**)*, i32 (i32, i8*, i64, i8**)*, i32, [256 x i32], [256 x i32], [256 x i32], %struct._RuneRange, %struct._RuneRange, %struct._RuneRange, i8*, i32, i32, %struct._RuneCharClass* } @@ -46,6 +20,245 @@ @_DefaultRuneLocale = external global %struct._RuneLocale declare i32 @__maskrune(i32, i64) #7 define fastcc i32 @prune_match(%struct.Connector_struct* nocapture readonly %a, %struct.Connector_struct* nocapture readonly %b) #9 { +; CHECK-LABEL: prune_match: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movzwl (%rdi), %eax +; CHECK-NEXT: cmpw (%rsi), %ax +; CHECK-NEXT: jne LBB0_46 +; CHECK-NEXT: ## %bb.1: ## %if.end +; CHECK-NEXT: movb 4(%rdi), %r9b +; CHECK-NEXT: movb 4(%rsi), %r10b +; CHECK-NEXT: movq 16(%rdi), %r11 +; CHECK-NEXT: movq 16(%rsi), %r8 +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: movq __DefaultRuneLocale@{{.*}}(%rip), %rsi +; CHECK-NEXT: movl $32768, %ecx ## imm = 0x8000 +; CHECK-NEXT: jmp LBB0_3 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_2: ## %if.end17 +; CHECK-NEXT: ## in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: incq %rdx +; CHECK-NEXT: LBB0_3: ## %while.cond +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movsbq (%r11,%rdx), %rax +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: js LBB0_8 +; CHECK-NEXT: ## %bb.4: ## %cond.true.i.i +; CHECK-NEXT: ## in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: movl 60(%rsi,%rax,4), %eax +; CHECK-NEXT: andl %ecx, %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: jne LBB0_7 +; CHECK-NEXT: LBB0_5: ## %lor.rhs +; CHECK-NEXT: ## in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: movsbq (%rdx,%r8), %rax +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: js LBB0_9 +; CHECK-NEXT: ## %bb.6: ## %cond.true.i.i217 +; CHECK-NEXT: ## in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: movl 60(%rsi,%rax,4), %eax +; CHECK-NEXT: andl %ecx, %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: je LBB0_10 +; CHECK-NEXT: LBB0_7: ## %while.body +; CHECK-NEXT: ## in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: movzbl (%rdx,%r11), %eax +; CHECK-NEXT: cmpb (%rdx,%r8), %al +; CHECK-NEXT: je LBB0_2 +; CHECK-NEXT: jmp LBB0_46 +; CHECK-NEXT: LBB0_8: ## %cond.false.i.i +; CHECK-NEXT: ## in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: movl $32768, %esi ## imm = 0x8000 +; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill +; CHECK-NEXT: movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill +; CHECK-NEXT: callq ___maskrune +; CHECK-NEXT: movl $32768, %ecx ## imm = 0x8000 +; CHECK-NEXT: movq __DefaultRuneLocale@{{.*}}(%rip), %rsi +; CHECK-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r10d ## 1-byte Folded Reload +; CHECK-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r9d ## 1-byte Folded Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: je LBB0_5 +; CHECK-NEXT: jmp LBB0_7 +; CHECK-NEXT: LBB0_9: ## %cond.false.i.i219 +; CHECK-NEXT: ## in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: movl $32768, %esi ## imm = 0x8000 +; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill +; CHECK-NEXT: movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill +; CHECK-NEXT: callq ___maskrune +; CHECK-NEXT: movl $32768, %ecx ## imm = 0x8000 +; CHECK-NEXT: movq __DefaultRuneLocale@{{.*}}(%rip), %rsi +; CHECK-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r10d ## 1-byte Folded Reload +; CHECK-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %r9d ## 1-byte Folded Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: jne LBB0_7 +; CHECK-NEXT: LBB0_10: ## %while.end +; CHECK-NEXT: movl %r10d, %eax +; CHECK-NEXT: orb %r9b, %al +; CHECK-NEXT: jne LBB0_26 +; CHECK-NEXT: ## %bb.11: ## %if.then23 +; CHECK-NEXT: movq 16(%rdi), %rcx +; CHECK-NEXT: cmpb $83, (%rcx) +; CHECK-NEXT: movb (%r11,%rdx), %sil +; CHECK-NEXT: je LBB0_21 +; CHECK-NEXT: LBB0_12: ## %while.cond59.preheader +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: LBB0_13: ## %while.cond59.preheader +; CHECK-NEXT: testb %sil, %sil +; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: LBB0_14: ## %land.rhs +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movzbl (%r8,%rdx), %ecx +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: ## %bb.15: ## %while.body66 +; CHECK-NEXT: ## in Loop: Header=BB0_14 Depth=1 +; CHECK-NEXT: cmpb $42, %sil +; CHECK-NEXT: je LBB0_19 +; CHECK-NEXT: ## %bb.16: ## %while.body66 +; CHECK-NEXT: ## in Loop: Header=BB0_14 Depth=1 +; CHECK-NEXT: cmpb $42, %cl +; CHECK-NEXT: je LBB0_19 +; CHECK-NEXT: ## %bb.17: ## %lor.lhs.false74 +; CHECK-NEXT: ## in Loop: Header=BB0_14 Depth=1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpb %cl, %sil +; CHECK-NEXT: jne LBB0_25 +; CHECK-NEXT: ## %bb.18: ## %lor.lhs.false74 +; CHECK-NEXT: ## in Loop: Header=BB0_14 Depth=1 +; CHECK-NEXT: cmpb $94, %sil +; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: LBB0_19: ## %if.then83 +; CHECK-NEXT: ## in Loop: Header=BB0_14 Depth=1 +; CHECK-NEXT: movzbl 1(%r11,%rdx), %esi +; CHECK-NEXT: incq %rdx +; CHECK-NEXT: testb %sil, %sil +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: jne LBB0_14 +; CHECK-NEXT: jmp LBB0_25 +; CHECK-NEXT: LBB0_21: ## %land.lhs.true28 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: testb %sil, %sil +; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: ## %bb.22: ## %land.lhs.true28 +; CHECK-NEXT: cmpb $112, %sil +; CHECK-NEXT: jne LBB0_14 +; CHECK-NEXT: ## %bb.23: ## %land.lhs.true35 +; CHECK-NEXT: cmpb $112, (%r8,%rdx) +; CHECK-NEXT: jne LBB0_14 +; CHECK-NEXT: ## %bb.24: ## %land.lhs.true43 +; CHECK-NEXT: movq %r11, %rdi +; CHECK-NEXT: subq %rcx, %rdi +; CHECK-NEXT: addq %rdx, %rdi +; CHECK-NEXT: cmpq $1, %rdi +; CHECK-NEXT: jne LBB0_44 +; CHECK-NEXT: LBB0_25: ## %return +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq +; CHECK-NEXT: LBB0_26: ## %if.else88 +; CHECK-NEXT: cmpb $1, %r9b +; CHECK-NEXT: jne LBB0_35 +; CHECK-NEXT: ## %bb.27: ## %if.else88 +; CHECK-NEXT: cmpb $2, %r10b +; CHECK-NEXT: jne LBB0_35 +; CHECK-NEXT: ## %bb.28: ## %while.cond95.preheader +; CHECK-NEXT: movb (%r11,%rdx), %sil +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: testb %sil, %sil +; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: LBB0_30: ## %land.rhs99 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movzbl (%r8,%rdx), %ecx +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: ## %bb.31: ## %while.body104 +; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=1 +; CHECK-NEXT: cmpb %cl, %sil +; CHECK-NEXT: je LBB0_29 +; CHECK-NEXT: ## %bb.32: ## %while.body104 +; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=1 +; CHECK-NEXT: cmpb $42, %sil +; CHECK-NEXT: je LBB0_29 +; CHECK-NEXT: ## %bb.33: ## %while.body104 +; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=1 +; CHECK-NEXT: cmpb $94, %cl +; CHECK-NEXT: jne LBB0_46 +; CHECK-NEXT: LBB0_29: ## %if.then117 +; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=1 +; CHECK-NEXT: movzbl 1(%r11,%rdx), %esi +; CHECK-NEXT: incq %rdx +; CHECK-NEXT: testb %sil, %sil +; CHECK-NEXT: jne LBB0_30 +; CHECK-NEXT: jmp LBB0_25 +; CHECK-NEXT: LBB0_46: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq +; CHECK-NEXT: LBB0_35: ## %if.else123 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpb $1, %r10b +; CHECK-NEXT: jne LBB0_25 +; CHECK-NEXT: ## %bb.36: ## %if.else123 +; CHECK-NEXT: cmpb $2, %r9b +; CHECK-NEXT: jne LBB0_25 +; CHECK-NEXT: ## %bb.37: ## %while.cond130.preheader +; CHECK-NEXT: movb (%r11,%rdx), %sil +; CHECK-NEXT: testb %sil, %sil +; CHECK-NEXT: je LBB0_43 +; CHECK-NEXT: LBB0_39: ## %land.rhs134 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movzbl (%r8,%rdx), %ecx +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: je LBB0_43 +; CHECK-NEXT: ## %bb.40: ## %while.body139 +; CHECK-NEXT: ## in Loop: Header=BB0_39 Depth=1 +; CHECK-NEXT: cmpb %cl, %sil +; CHECK-NEXT: je LBB0_38 +; CHECK-NEXT: ## %bb.41: ## %while.body139 +; CHECK-NEXT: ## in Loop: Header=BB0_39 Depth=1 +; CHECK-NEXT: cmpb $42, %cl +; CHECK-NEXT: je LBB0_38 +; CHECK-NEXT: ## %bb.42: ## %while.body139 +; CHECK-NEXT: ## in Loop: Header=BB0_39 Depth=1 +; CHECK-NEXT: cmpb $94, %sil +; CHECK-NEXT: jne LBB0_25 +; CHECK-NEXT: LBB0_38: ## %if.then152 +; CHECK-NEXT: ## in Loop: Header=BB0_39 Depth=1 +; CHECK-NEXT: movzbl 1(%r11,%rdx), %esi +; CHECK-NEXT: incq %rdx +; CHECK-NEXT: testb %sil, %sil +; CHECK-NEXT: jne LBB0_39 +; CHECK-NEXT: LBB0_43: +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq +; CHECK-NEXT: LBB0_44: ## %lor.lhs.false47 +; CHECK-NEXT: cmpq $2, %rdi +; CHECK-NEXT: jne LBB0_12 +; CHECK-NEXT: ## %bb.45: ## %land.lhs.true52 +; CHECK-NEXT: cmpb $73, -1(%r11,%rdx) +; CHECK-NEXT: jne LBB0_13 +; CHECK-NEXT: jmp LBB0_25 entry: %label56 = bitcast %struct.Connector_struct* %a to i16* %0 = load i16, i16* %label56, align 2 Index: llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll =================================================================== --- llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -68,7 +68,7 @@ ; CHECK-NEXT: je LBB0_55 ; CHECK-NEXT: ## %bb.6: ## %SyTime.exit2720 ; CHECK-NEXT: movq %rdx, %rbx -; CHECK-NEXT: movq %rdi, %rbp +; CHECK-NEXT: movq %rdi, %r14 ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: cmpq %rax, %rcx @@ -78,11 +78,10 @@ ; CHECK-NEXT: movl $32, %esi ; CHECK-NEXT: callq _memset ; CHECK-NEXT: LBB0_8: ## %while.body.preheader -; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: imulq $1040, %rbx, %rax ## imm = 0x410 ; CHECK-NEXT: movq _syBuf@{{.*}}(%rip), %rcx ; CHECK-NEXT: leaq 8(%rcx,%rax), %rbx -; CHECK-NEXT: movl $1, %r15d +; CHECK-NEXT: movl $1, %ebp ; CHECK-NEXT: movq _syCTRO@{{.*}}(%rip), %rax ; CHECK-NEXT: movb $1, %cl ; CHECK-NEXT: .p2align 4, 0x90 @@ -92,47 +91,48 @@ ; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: jne LBB0_9 ; CHECK-NEXT: ## %bb.10: ## %do.end -; CHECK-NEXT: xorl %r14d, %r14d -; CHECK-NEXT: testb %r14b, %r14b +; CHECK-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: xorl %r13d, %r13d +; CHECK-NEXT: testb %r13b, %r13b ; CHECK-NEXT: jne LBB0_11 ; CHECK-NEXT: ## %bb.12: ## %while.body200.preheader -; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: xorl %r12d, %r12d +; CHECK-NEXT: leaq {{.*}}(%rip), %rdx ; CHECK-NEXT: leaq {{.*}}(%rip), %rsi -; CHECK-NEXT: leaq {{.*}}(%rip), %rdi -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: xorl %r13d, %r13d +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: xorl %r14d, %r14d ; CHECK-NEXT: jmp LBB0_13 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_14: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal 1(%r14), %eax +; CHECK-NEXT: leal 1(%r13), %eax ; CHECK-NEXT: cmpl $21, %eax ; CHECK-NEXT: ja LBB0_20 ; CHECK-NEXT: ## %bb.15: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $-1, %r13d -; CHECK-NEXT: movslq (%rsi,%rax,4), %rax -; CHECK-NEXT: addq %rsi, %rax +; CHECK-NEXT: movl $-1, %r14d +; CHECK-NEXT: movslq (%rdx,%rax,4), %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: LBB0_18: ## %while.cond201.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $1, %r13d +; CHECK-NEXT: movl $1, %r14d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: movl %r14d, %r13d +; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: movl %r13d, %r14d ; CHECK-NEXT: jne LBB0_21 ; CHECK-NEXT: jmp LBB0_55 ; CHECK-NEXT: LBB0_26: ## %sw.bb474 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: ## implicit-def: $r12 +; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: ## implicit-def: $r15 ; CHECK-NEXT: jne LBB0_34 ; CHECK-NEXT: ## %bb.27: ## %do.body479.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: ## implicit-def: $r12 +; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: ## implicit-def: $r15 ; CHECK-NEXT: jne LBB0_34 ; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -145,27 +145,26 @@ ; CHECK-NEXT: js LBB0_55 ; CHECK-NEXT: ## %bb.30: ## %cond.true.i.i2780 ; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 -; CHECK-NEXT: movq %rax, %r12 -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: movq %rax, %r15 +; CHECK-NEXT: testb %r12b, %r12b ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: ## %bb.31: ## %lor.rhs500 ; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 ; CHECK-NEXT: movl $256, %esi ## imm = 0x100 ; CHECK-NEXT: callq ___maskrune -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: testb %r12b, %r12b ; CHECK-NEXT: je LBB0_34 ; CHECK-NEXT: LBB0_32: ## %do.body479.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 -; CHECK-NEXT: leaq 1(%r12), %rax -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: leaq 1(%r15), %rax +; CHECK-NEXT: testb %r12b, %r12b ; CHECK-NEXT: jne LBB0_29 ; CHECK-NEXT: ## %bb.33: ## %if.end517.loopexitsplit ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: incq %r12 +; CHECK-NEXT: incq %r15 ; CHECK-NEXT: LBB0_34: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal -324(%r13), %eax +; CHECK-NEXT: leal -324(%r14), %eax ; CHECK-NEXT: cmpl $59, %eax ; CHECK-NEXT: ja LBB0_35 ; CHECK-NEXT: ## %bb.57: ## %if.end517 @@ -175,11 +174,11 @@ ; CHECK-NEXT: jb LBB0_38 ; CHECK-NEXT: LBB0_35: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $11, %r13d +; CHECK-NEXT: cmpl $11, %r14d ; CHECK-NEXT: je LBB0_38 ; CHECK-NEXT: ## %bb.36: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $24, %r13d +; CHECK-NEXT: cmpl $24, %r14d ; CHECK-NEXT: je LBB0_38 ; CHECK-NEXT: ## %bb.37: ## %if.then532 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -189,15 +188,15 @@ ; CHECK-NEXT: LBB0_38: ## %for.cond534 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: testb %r12b, %r12b ; CHECK-NEXT: jne LBB0_38 ; CHECK-NEXT: ## %bb.39: ## %for.cond542.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: movb $0, (%r12) -; CHECK-NEXT: movl %r14d, %r13d +; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: movb $0, (%r15) +; CHECK-NEXT: movl %r13d, %r14d +; CHECK-NEXT: leaq {{.*}}(%rip), %rdx ; CHECK-NEXT: leaq {{.*}}(%rip), %rsi -; CHECK-NEXT: leaq {{.*}}(%rip), %rdi ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_45: ## %sw.bb1134 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -206,38 +205,38 @@ ; CHECK-NEXT: cmpq %rax, %rcx ; CHECK-NEXT: jb LBB0_55 ; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: movl $268, %r13d ## imm = 0x10C +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: movl $268, %r14d ## imm = 0x10C ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_19: ## %sw.bb243 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $2, %r13d +; CHECK-NEXT: movl $2, %r14d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_40: ## %sw.bb566 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $20, %r13d +; CHECK-NEXT: movl $20, %r14d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_13: ## %while.body200 ; CHECK-NEXT: ## =>This Loop Header: Depth=1 ; CHECK-NEXT: ## Child Loop BB0_29 Depth 2 ; CHECK-NEXT: ## Child Loop BB0_38 Depth 2 -; CHECK-NEXT: leal -268(%r14), %eax +; CHECK-NEXT: leal -268(%r13), %eax ; CHECK-NEXT: cmpl $105, %eax ; CHECK-NEXT: ja LBB0_14 ; CHECK-NEXT: ## %bb.56: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movslq (%rdi,%rax,4), %rax -; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: movslq (%rsi,%rax,4), %rax +; CHECK-NEXT: addq %rsi, %rax ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: LBB0_20: ## %sw.bb256 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl %r14d, %r13d +; CHECK-NEXT: movl %r13d, %r14d ; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: decl %r15d -; CHECK-NEXT: testl %r15d, %r15d -; CHECK-NEXT: movl %r13d, %r14d +; CHECK-NEXT: decl %ebp +; CHECK-NEXT: testl %ebp, %ebp +; CHECK-NEXT: movl %r14d, %r13d ; CHECK-NEXT: jg LBB0_13 ; CHECK-NEXT: jmp LBB0_22 ; CHECK-NEXT: .p2align 4, 0x90 @@ -253,15 +252,15 @@ ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: jmp LBB0_25 ; CHECK-NEXT: LBB0_11: -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: xorl %r13d, %r13d +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: xorl %r14d, %r14d ; CHECK-NEXT: LBB0_22: ## %while.end1465 -; CHECK-NEXT: incl %r13d -; CHECK-NEXT: cmpl $16, %r13d +; CHECK-NEXT: incl %r14d +; CHECK-NEXT: cmpl $16, %r14d ; CHECK-NEXT: ja LBB0_50 ; CHECK-NEXT: ## %bb.23: ## %while.end1465 ; CHECK-NEXT: movl $83969, %eax ## imm = 0x14801 -; CHECK-NEXT: btl %r13d, %eax +; CHECK-NEXT: btl %r14d, %eax ; CHECK-NEXT: jae LBB0_50 ; CHECK-NEXT: ## %bb.24: ; CHECK-NEXT: xorl %ebp, %ebp @@ -284,6 +283,7 @@ ; CHECK-NEXT: ## %bb.51: ## %for.body1664.lr.ph ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ebp ## 4-byte Reload ; CHECK-NEXT: jne LBB0_54 ; CHECK-NEXT: ## %bb.52: ## %while.body1679.preheader ; CHECK-NEXT: incl %ebp Index: llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll =================================================================== --- llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll +++ llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=x86 -regalloc=greedy --debug-only=regalloc 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=x86_64-- < %s -march=x86 -regalloc=greedy | FileCheck %s ; REQUIRES: asserts @@ -14,19 +15,57 @@ ; The new code choses %ebp as the split candidate as it has lower spill cost. ; Make sure the split behaves as expected -; CHECK: RS_Split Cascade 1 -; CHECK-NOT: $eax static = -; CHECK: $eax no positive bundles -; CHECK-NEXT: $ecx no positive bundles -; CHECK-NEXT: $edx no positive bundles -; CHECK-NEXT: $esi static = -; CHECK-NEXT: $edi no positive bundles -; CHECK-NEXT: $ebx no positive bundles -; CHECK-NEXT: $ebp static = -; CHECK: Split for $ebp - ; Function Attrs: nounwind define i32 @foo(i32* %array, i32 %cond1, i32 %val) local_unnamed_addr #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl (%esi), %ebp +; CHECK-NEXT: movl 4(%esi), %ebx +; CHECK-NEXT: movl 8(%esi), %ecx +; CHECK-NEXT: movl 12(%esi), %edx +; CHECK-NEXT: movl 16(%esi), %edi +; CHECK-NEXT: movl (%esi,%eax,4), %eax +; CHECK-NEXT: shll $5, %eax +; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movl %eax, 24(%esi) +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: leal 28(%ecx), %edx +; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: .LBB0_2: # %if.else +; CHECK-NEXT: movl %ebp, (%esp) # 4-byte Spill +; CHECK-NEXT: movl 20(%esi), %ebp +; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %eax, 32(%esi) +; CHECK-NEXT: movl %eax, %ebp +; CHECK-NEXT: movl (%esp), %eax # 4-byte Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movl %ebp, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: leal 36(%ecx), %edx +; CHECK-NEXT: .LBB0_3: # %if.end +; CHECK-NEXT: movl %eax, (%edx) +; CHECK-NEXT: addl (%ecx), %eax +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl entry: %array.addr = alloca i32*, align 4 store i32* %array, i32** %array.addr, align 4, !tbaa !3 Index: llvm/test/CodeGen/X86/sjlj-eh.ll =================================================================== --- llvm/test/CodeGen/X86/sjlj-eh.ll +++ llvm/test/CodeGen/X86/sjlj-eh.ll @@ -62,7 +62,6 @@ ; CHECK: calll __Unwind_SjLj_Unregister ; ; CHECK: [[RESUME]]: -; CHECK: leal -64(%ebp), %esi ; assert(UFC.__callsite < 1); ; CHECK: movl -60(%ebp), %eax ; CHECK: cmpl $1, %eax Index: llvm/test/CodeGen/X86/speculative-load-hardening.ll =================================================================== --- llvm/test/CodeGen/X86/speculative-load-hardening.ll +++ llvm/test/CodeGen/X86/speculative-load-hardening.ll @@ -32,86 +32,85 @@ define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %ptr2, i32** %ptr3) speculative_load_hardening { ; X64-LABEL: test_basic_conditions: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %r15 -; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: pushq %r14 -; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: pushq %rbx +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: pushq %rax ; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: .cfi_offset %rbx, -32 -; X64-NEXT: .cfi_offset %r14, -24 -; X64-NEXT: .cfi_offset %r15, -16 +; X64-NEXT: .cfi_offset %rbx, -24 +; X64-NEXT: .cfi_offset %r14, -16 ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %rbx +; X64-NEXT: movq $-1, %r10 ; X64-NEXT: sarq $63, %rax ; X64-NEXT: testl %edi, %edi ; X64-NEXT: jne .LBB1_1 ; X64-NEXT: # %bb.2: # %then1 -; X64-NEXT: cmovneq %rbx, %rax +; X64-NEXT: cmovneq %r10, %rax ; X64-NEXT: testl %esi, %esi ; X64-NEXT: je .LBB1_4 ; X64-NEXT: .LBB1_1: -; X64-NEXT: cmoveq %rbx, %rax +; X64-NEXT: cmoveq %r10, %rax ; X64-NEXT: .LBB1_8: # %exit ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp -; X64-NEXT: popq %rbx +; X64-NEXT: addq $8, %rsp ; X64-NEXT: .cfi_def_cfa_offset 24 -; X64-NEXT: popq %r14 +; X64-NEXT: popq %rbx ; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: popq %r15 +; X64-NEXT: popq %r14 ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq ; X64-NEXT: .LBB1_4: # %then2 ; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: movq %r8, %r14 -; X64-NEXT: cmovneq %rbx, %rax +; X64-NEXT: cmovneq %r10, %rax ; X64-NEXT: testl %edx, %edx ; X64-NEXT: je .LBB1_6 ; X64-NEXT: # %bb.5: # %else3 -; X64-NEXT: cmoveq %rbx, %rax -; X64-NEXT: movslq (%r9), %rcx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: leaq (%r14,%rcx,4), %r15 -; X64-NEXT: movl %ecx, (%r14,%rcx,4) +; X64-NEXT: cmoveq %r10, %rax +; X64-NEXT: movslq (%r9), %rdx +; X64-NEXT: orq %rax, %rdx +; X64-NEXT: leaq (%r8,%rdx,4), %rcx +; X64-NEXT: movl %edx, (%r8,%rdx,4) ; X64-NEXT: jmp .LBB1_7 ; X64-NEXT: .LBB1_6: # %then3 -; X64-NEXT: cmovneq %rbx, %rax +; X64-NEXT: cmovneq %r10, %rax ; X64-NEXT: movl (%rcx), %ecx -; X64-NEXT: addl (%r14), %ecx +; X64-NEXT: addl (%r8), %ecx ; X64-NEXT: movslq %ecx, %rdi ; X64-NEXT: orq %rax, %rdi -; X64-NEXT: movl (%r14,%rdi,4), %esi +; X64-NEXT: movl (%r8,%rdi,4), %esi ; X64-NEXT: orl %eax, %esi -; X64-NEXT: movq (%r9), %r15 -; X64-NEXT: orq %rax, %r15 -; X64-NEXT: addl (%r15), %esi +; X64-NEXT: movq (%r9), %rbx +; X64-NEXT: orq %rax, %rbx +; X64-NEXT: addl (%rbx), %esi ; X64-NEXT: shlq $47, %rax ; X64-NEXT: # kill: def $edi killed $edi killed $rdi ; X64-NEXT: orq %rax, %rsp +; X64-NEXT: movq %r8, (%rsp) # 8-byte Spill +; X64-NEXT: movq $-1, %r14 ; X64-NEXT: callq leak ; X64-NEXT: .Lslh_ret_addr0: +; X64-NEXT: movq %rbx, %rcx +; X64-NEXT: movq (%rsp), %r8 # 8-byte Reload ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; X64-NEXT: sarq $63, %rax -; X64-NEXT: cmpq $.Lslh_ret_addr0, %rcx -; X64-NEXT: cmovneq %rbx, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr0, %rdx +; X64-NEXT: cmovneq %r14, %rax ; X64-NEXT: .LBB1_7: # %merge -; X64-NEXT: movslq (%r15), %rcx +; X64-NEXT: movslq (%rcx), %rcx ; X64-NEXT: orq %rax, %rcx -; X64-NEXT: movl $0, (%r14,%rcx,4) +; X64-NEXT: movl $0, (%r8,%rcx,4) ; X64-NEXT: jmp .LBB1_8 ; ; X64-LFENCE-LABEL: test_basic_conditions: ; X64-LFENCE: # %bb.0: # %entry -; X64-LFENCE-NEXT: pushq %r14 -; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16 ; X64-LFENCE-NEXT: pushq %rbx -; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24 -; X64-LFENCE-NEXT: pushq %rax +; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16 +; X64-LFENCE-NEXT: subq $16, %rsp ; X64-LFENCE-NEXT: .cfi_def_cfa_offset 32 -; X64-LFENCE-NEXT: .cfi_offset %rbx, -24 -; X64-LFENCE-NEXT: .cfi_offset %r14, -16 +; X64-LFENCE-NEXT: .cfi_offset %rbx, -16 ; X64-LFENCE-NEXT: testl %edi, %edi ; X64-LFENCE-NEXT: jne .LBB1_6 ; X64-LFENCE-NEXT: # %bb.1: # %then1 @@ -119,36 +118,36 @@ ; X64-LFENCE-NEXT: testl %esi, %esi ; X64-LFENCE-NEXT: jne .LBB1_6 ; X64-LFENCE-NEXT: # %bb.2: # %then2 -; X64-LFENCE-NEXT: movq %r8, %rbx ; X64-LFENCE-NEXT: lfence ; X64-LFENCE-NEXT: testl %edx, %edx ; X64-LFENCE-NEXT: je .LBB1_3 ; X64-LFENCE-NEXT: # %bb.4: # %else3 ; X64-LFENCE-NEXT: lfence -; X64-LFENCE-NEXT: movslq (%r9), %rax -; X64-LFENCE-NEXT: leaq (%rbx,%rax,4), %r14 -; X64-LFENCE-NEXT: movl %eax, (%rbx,%rax,4) +; X64-LFENCE-NEXT: movslq (%r9), %rcx +; X64-LFENCE-NEXT: leaq (%r8,%rcx,4), %rax +; X64-LFENCE-NEXT: movl %ecx, (%r8,%rcx,4) ; X64-LFENCE-NEXT: jmp .LBB1_5 ; X64-LFENCE-NEXT: .LBB1_3: # %then3 ; X64-LFENCE-NEXT: lfence ; X64-LFENCE-NEXT: movl (%rcx), %eax -; X64-LFENCE-NEXT: addl (%rbx), %eax +; X64-LFENCE-NEXT: addl (%r8), %eax ; X64-LFENCE-NEXT: movslq %eax, %rdi -; X64-LFENCE-NEXT: movl (%rbx,%rdi,4), %esi -; X64-LFENCE-NEXT: movq (%r9), %r14 -; X64-LFENCE-NEXT: addl (%r14), %esi +; X64-LFENCE-NEXT: movl (%r8,%rdi,4), %esi +; X64-LFENCE-NEXT: movq (%r9), %rbx +; X64-LFENCE-NEXT: addl (%rbx), %esi ; X64-LFENCE-NEXT: # kill: def $edi killed $edi killed $rdi +; X64-LFENCE-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-LFENCE-NEXT: callq leak +; X64-LFENCE-NEXT: movq %rbx, %rax +; X64-LFENCE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload ; X64-LFENCE-NEXT: .LBB1_5: # %merge -; X64-LFENCE-NEXT: movslq (%r14), %rax -; X64-LFENCE-NEXT: movl $0, (%rbx,%rax,4) +; X64-LFENCE-NEXT: movslq (%rax), %rax +; X64-LFENCE-NEXT: movl $0, (%r8,%rax,4) ; X64-LFENCE-NEXT: .LBB1_6: # %exit ; X64-LFENCE-NEXT: lfence -; X64-LFENCE-NEXT: addq $8, %rsp -; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24 -; X64-LFENCE-NEXT: popq %rbx +; X64-LFENCE-NEXT: addq $16, %rsp ; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16 -; X64-LFENCE-NEXT: popq %r14 +; X64-LFENCE-NEXT: popq %rbx ; X64-LFENCE-NEXT: .cfi_def_cfa_offset 8 ; X64-LFENCE-NEXT: retq entry: @@ -502,28 +501,22 @@ ; X64: # %bb.0: # %entry ; X64-NEXT: pushq %rbp ; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: pushq %r15 -; X64-NEXT: .cfi_def_cfa_offset 24 -; X64-NEXT: pushq %r14 -; X64-NEXT: .cfi_def_cfa_offset 32 ; X64-NEXT: pushq %rbx -; X64-NEXT: .cfi_def_cfa_offset 40 -; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: subq $24, %rsp ; X64-NEXT: .cfi_def_cfa_offset 48 -; X64-NEXT: .cfi_offset %rbx, -40 -; X64-NEXT: .cfi_offset %r14, -32 -; X64-NEXT: .cfi_offset %r15, -24 +; X64-NEXT: .cfi_offset %rbx, -24 ; X64-NEXT: .cfi_offset %rbp, -16 ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %r15 +; X64-NEXT: movq $-1, %rbx ; X64-NEXT: sarq $63, %rax ; X64-NEXT: cmpl $41, %edi ; X64-NEXT: jg .LBB4_1 ; X64-NEXT: # %bb.2: # %thrower -; X64-NEXT: movq %rdx, %r14 -; X64-NEXT: movq %rsi, %rbx -; X64-NEXT: cmovgq %r15, %rax +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: cmovgq %rbx, %rax ; X64-NEXT: movslq %edi, %rcx +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movl (%rsi,%rcx,4), %ebp ; X64-NEXT: orl %eax, %ebp ; X64-NEXT: movl $4, %edi @@ -535,7 +528,7 @@ ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; X64-NEXT: sarq $63, %rcx ; X64-NEXT: cmpq $.Lslh_ret_addr4, %rdx -; X64-NEXT: cmovneq %r15, %rcx +; X64-NEXT: cmovneq %rbx, %rcx ; X64-NEXT: movl %ebp, (%rax) ; X64-NEXT: .Ltmp0: ; X64-NEXT: shlq $47, %rcx @@ -549,21 +542,17 @@ ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax ; X64-NEXT: cmpq $.Lslh_ret_addr5, %rcx -; X64-NEXT: cmovneq %r15, %rax +; X64-NEXT: cmovneq %rbx, %rax ; X64-NEXT: .Ltmp1: ; X64-NEXT: jmp .LBB4_3 ; X64-NEXT: .LBB4_1: -; X64-NEXT: cmovleq %r15, %rax +; X64-NEXT: cmovleq %rbx, %rax ; X64-NEXT: .LBB4_3: # %exit ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp -; X64-NEXT: addq $8, %rsp -; X64-NEXT: .cfi_def_cfa_offset 40 -; X64-NEXT: popq %rbx -; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: popq %r14 +; X64-NEXT: addq $24, %rsp ; X64-NEXT: .cfi_def_cfa_offset 24 -; X64-NEXT: popq %r15 +; X64-NEXT: popq %rbx ; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: popq %rbp ; X64-NEXT: .cfi_def_cfa_offset 8 @@ -573,11 +562,13 @@ ; X64-NEXT: .Ltmp2: ; X64-NEXT: movq %rsp, %rcx ; X64-NEXT: sarq $63, %rcx -; X64-NEXT: movl (%rax), %eax -; X64-NEXT: addl (%rbx), %eax -; X64-NEXT: cltq -; X64-NEXT: orq %rcx, %rax -; X64-NEXT: movl (%r14,%rax,4), %edi +; X64-NEXT: movl (%rax), %edx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: addl (%rax), %edx +; X64-NEXT: movslq %edx, %rdx +; X64-NEXT: orq %rcx, %rdx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: movl (%rax,%rdx,4), %edi ; X64-NEXT: orl %ecx, %edi ; X64-NEXT: shlq $47, %rcx ; X64-NEXT: orq %rcx, %rsp @@ -587,7 +578,7 @@ ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax ; X64-NEXT: cmpq $.Lslh_ret_addr6, %rcx -; X64-NEXT: cmovneq %r15, %rax +; X64-NEXT: cmovneq %rbx, %rax ; ; X64-LFENCE-LABEL: test_basic_eh: ; X64-LFENCE: # %bb.0: # %entry Index: llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll =================================================================== --- llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll +++ llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll @@ -87,24 +87,23 @@ ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: pushq %r15 ; CHECK-NEXT: pushq %r14 -; CHECK-NEXT: pushq %r13 ; CHECK-NEXT: pushq %r12 ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: pushq %rax +; CHECK-NEXT: subq $16, %rsp ; CHECK-NEXT: movl $1, %ebx ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB1_26 ; CHECK-NEXT: # %bb.1: # %if.end19 -; CHECK-NEXT: movl %esi, %r13d -; CHECK-NEXT: movq %rdi, %r12 +; CHECK-NEXT: movl %esi, %r15d +; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: movl (%rax), %ebp -; CHECK-NEXT: leal (,%rbp,4), %r14d -; CHECK-NEXT: movl %r14d, %r15d +; CHECK-NEXT: leal (,%rbp,4), %r12d +; CHECK-NEXT: movl %r12d, %r14d ; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: movq %r15, %rdi +; CHECK-NEXT: movq %r14, %rdi ; CHECK-NEXT: callq cli_calloc -; CHECK-NEXT: testl %r13d, %r13d +; CHECK-NEXT: testl %r15d, %r15d ; CHECK-NEXT: je .LBB1_25 ; CHECK-NEXT: # %bb.2: # %if.end19 ; CHECK-NEXT: testl %ebp, %ebp @@ -115,13 +114,13 @@ ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB1_25 ; CHECK-NEXT: # %bb.4: # %if.end19 -; CHECK-NEXT: cmpq %r12, %rbx +; CHECK-NEXT: cmpq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload ; CHECK-NEXT: jb .LBB1_25 ; CHECK-NEXT: # %bb.5: # %if.end50 ; CHECK-NEXT: movq %rbx, %rdi -; CHECK-NEXT: movq %r15, %rdx +; CHECK-NEXT: movq %r14, %rdx ; CHECK-NEXT: callq memcpy -; CHECK-NEXT: cmpl $4, %r14d +; CHECK-NEXT: cmpl $4, %r12d ; CHECK-NEXT: jb .LBB1_28 ; CHECK-NEXT: # %bb.6: # %shared_preheader ; CHECK-NEXT: movb $32, %dl @@ -201,10 +200,9 @@ ; CHECK-NEXT: callq cli_dbgmsg ; CHECK-NEXT: .LBB1_26: # %cleanup ; CHECK-NEXT: movl %ebx, %eax -; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: addq $16, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 -; CHECK-NEXT: popq %r13 ; CHECK-NEXT: popq %r14 ; CHECK-NEXT: popq %r15 ; CHECK-NEXT: popq %rbp Index: llvm/test/CodeGen/X86/tail-opts.ll =================================================================== --- llvm/test/CodeGen/X86/tail-opts.ll +++ llvm/test/CodeGen/X86/tail-opts.ll @@ -239,78 +239,80 @@ define fastcc void @c_expand_expr_stmt(%union.tree_node* %expr) nounwind { ; CHECK-LABEL: c_expand_expr_stmt: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: pushq %rax ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB3_17 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: movb 0, %bl -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: movb 0, %al +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: jne .LBB3_16 ; CHECK-NEXT: # %bb.2: # %bb.i -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: je .LBB3_16 ; CHECK-NEXT: # %bb.3: # %lvalue_p.exit -; CHECK-NEXT: movq 0, %rax -; CHECK-NEXT: movzbl (%rax), %ecx -; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: movq 0, %rcx +; CHECK-NEXT: movzbl (%rcx), %edx +; CHECK-NEXT: testl %edx, %edx ; CHECK-NEXT: je .LBB3_12 ; CHECK-NEXT: # %bb.4: # %lvalue_p.exit -; CHECK-NEXT: cmpl $2, %ecx +; CHECK-NEXT: cmpl $2, %edx ; CHECK-NEXT: jne .LBB3_5 ; CHECK-NEXT: # %bb.6: # %bb.i1 -; CHECK-NEXT: movq 32(%rax), %rax -; CHECK-NEXT: movzbl 16(%rax), %ecx -; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: movq 32(%rcx), %rcx +; CHECK-NEXT: movzbl 16(%rcx), %edx +; CHECK-NEXT: testl %edx, %edx ; CHECK-NEXT: je .LBB3_10 ; CHECK-NEXT: # %bb.7: # %bb.i1 -; CHECK-NEXT: cmpl $2, %ecx +; CHECK-NEXT: cmpl $2, %edx ; CHECK-NEXT: jne .LBB3_8 ; CHECK-NEXT: # %bb.9: # %bb.i.i ; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: callq lvalue_p ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: setne %al -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; CHECK-NEXT: setne %cl +; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: je .LBB3_15 ; CHECK-NEXT: jmp .LBB3_17 ; CHECK-NEXT: .LBB3_16: # %bb1 -; CHECK-NEXT: cmpb $23, %bl +; CHECK-NEXT: cmpb $23, %al ; CHECK-NEXT: .LBB3_17: # %bb3 ; CHECK-NEXT: .LBB3_12: # %bb2.i3 -; CHECK-NEXT: movq 8(%rax), %rax -; CHECK-NEXT: movb 16(%rax), %cl -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpb $23, %cl +; CHECK-NEXT: movq 8(%rcx), %rcx +; CHECK-NEXT: movb 16(%rcx), %dl +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: cmpb $23, %dl ; CHECK-NEXT: je .LBB3_14 ; CHECK-NEXT: # %bb.13: # %bb2.i3 -; CHECK-NEXT: cmpb $16, %cl +; CHECK-NEXT: cmpb $16, %dl ; CHECK-NEXT: je .LBB3_14 ; CHECK-NEXT: jmp .LBB3_17 ; CHECK-NEXT: .LBB3_5: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: je .LBB3_15 ; CHECK-NEXT: jmp .LBB3_17 ; CHECK-NEXT: .LBB3_10: # %bb2.i.i2 -; CHECK-NEXT: movq 8(%rax), %rax -; CHECK-NEXT: movb 16(%rax), %cl -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpb $16, %cl +; CHECK-NEXT: movq 8(%rcx), %rcx +; CHECK-NEXT: movb 16(%rcx), %dl +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: cmpb $16, %dl ; CHECK-NEXT: je .LBB3_14 ; CHECK-NEXT: # %bb.11: # %bb2.i.i2 -; CHECK-NEXT: cmpb $23, %cl +; CHECK-NEXT: cmpb $23, %dl ; CHECK-NEXT: je .LBB3_14 ; CHECK-NEXT: jmp .LBB3_17 ; CHECK-NEXT: .LBB3_8: -; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: .LBB3_14: # %lvalue_p.exit4 -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: jne .LBB3_17 ; CHECK-NEXT: .LBB3_15: # %lvalue_p.exit4 -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: testb %al, %al entry: %tmp4 = load i8, i8* null, align 8 ; [#uses=3] switch i8 %tmp4, label %bb3 [ Index: llvm/test/CodeGen/X86/tbm_patterns.ll =================================================================== --- llvm/test/CodeGen/X86/tbm_patterns.ll +++ llvm/test/CodeGen/X86/tbm_patterns.ll @@ -875,14 +875,16 @@ define i32 @blcic32_branch(i32 %x) nounwind { ; CHECK-LABEL: blcic32_branch: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: blcicl %edi, %ebx -; CHECK-NEXT: jne .LBB69_2 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: blcicl %edi, %eax +; CHECK-NEXT: je .LBB69_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB69_1: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: callq bar -; CHECK-NEXT: .LBB69_2: -; CHECK-NEXT: movl %ebx, %eax -; CHECK-NEXT: popq %rbx +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: retq %tmp = xor i32 %x, -1 %tmp2 = add i32 %x, 1 @@ -898,14 +900,16 @@ define i64 @blcic64_branch(i64 %x) nounwind { ; CHECK-LABEL: blcic64_branch: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: blcicq %rdi, %rbx -; CHECK-NEXT: jne .LBB70_2 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: blcicq %rdi, %rax +; CHECK-NEXT: je .LBB70_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB70_1: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movq %rax, (%rsp) # 8-byte Spill ; CHECK-NEXT: callq bar -; CHECK-NEXT: .LBB70_2: -; CHECK-NEXT: movq %rbx, %rax -; CHECK-NEXT: popq %rbx +; CHECK-NEXT: movq (%rsp), %rax # 8-byte Reload +; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: retq %tmp = xor i64 %x, -1 %tmp2 = add i64 %x, 1 @@ -921,14 +925,16 @@ define i32 @tzmsk32_branch(i32 %x) nounwind { ; CHECK-LABEL: tzmsk32_branch: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: tzmskl %edi, %ebx -; CHECK-NEXT: jne .LBB71_2 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: tzmskl %edi, %eax +; CHECK-NEXT: je .LBB71_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB71_1: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: callq bar -; CHECK-NEXT: .LBB71_2: -; CHECK-NEXT: movl %ebx, %eax -; CHECK-NEXT: popq %rbx +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: retq %tmp = xor i32 %x, -1 %tmp2 = add i32 %x, -1 @@ -944,14 +950,16 @@ define i64 @tzmsk64_branch(i64 %x) nounwind { ; CHECK-LABEL: tzmsk64_branch: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: tzmskq %rdi, %rbx -; CHECK-NEXT: jne .LBB72_2 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: tzmskq %rdi, %rax +; CHECK-NEXT: je .LBB72_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB72_1: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movq %rax, (%rsp) # 8-byte Spill ; CHECK-NEXT: callq bar -; CHECK-NEXT: .LBB72_2: -; CHECK-NEXT: movq %rbx, %rax -; CHECK-NEXT: popq %rbx +; CHECK-NEXT: movq (%rsp), %rax # 8-byte Reload +; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: retq %tmp = xor i64 %x, -1 %tmp2 = add i64 %x, -1 Index: llvm/test/CodeGen/X86/x86-shrink-wrapping.ll =================================================================== --- llvm/test/CodeGen/X86/x86-shrink-wrapping.ll +++ llvm/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -1374,9 +1374,6 @@ ; ENABLE-NEXT: .cfi_offset %rbp, -16 ; ENABLE-NEXT: movq %rsp, %rbp ; ENABLE-NEXT: .cfi_def_cfa_register %rbp -; ENABLE-NEXT: pushq %rbx -; ENABLE-NEXT: pushq %rax -; ENABLE-NEXT: .cfi_offset %rbx, -24 ; ENABLE-NEXT: movq _irreducibleCFGf@{{.*}}(%rip), %rax ; ENABLE-NEXT: cmpb $0, (%rax) ; ENABLE-NEXT: je LBB16_2 @@ -1387,26 +1384,22 @@ ; ENABLE-NEXT: LBB16_2: ## %split ; ENABLE-NEXT: movq _irreducibleCFGb@{{.*}}(%rip), %rax ; ENABLE-NEXT: cmpl $0, (%rax) -; ENABLE-NEXT: je LBB16_3 -; ENABLE-NEXT: ## %bb.4: ## %for.body4.i +; ENABLE-NEXT: je LBB16_4 +; ENABLE-NEXT: ## %bb.3: ## %for.body4.i ; ENABLE-NEXT: movq _irreducibleCFGa@{{.*}}(%rip), %rax ; ENABLE-NEXT: movl (%rax), %edi -; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: callq _something -; ENABLE-NEXT: jmp LBB16_5 -; ENABLE-NEXT: LBB16_3: -; ENABLE-NEXT: xorl %ebx, %ebx +; ENABLE-NEXT: LBB16_4: ## %for.inc +; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: .p2align 4, 0x90 ; ENABLE-NEXT: LBB16_5: ## %for.inc ; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 -; ENABLE-NEXT: incl %ebx -; ENABLE-NEXT: cmpl $7, %ebx +; ENABLE-NEXT: incl %eax +; ENABLE-NEXT: cmpl $7, %eax ; ENABLE-NEXT: jl LBB16_5 ; ENABLE-NEXT: ## %bb.6: ## %fn1.exit ; ENABLE-NEXT: xorl %eax, %eax -; ENABLE-NEXT: addq $8, %rsp -; ENABLE-NEXT: popq %rbx ; ENABLE-NEXT: popq %rbp ; ENABLE-NEXT: retq ; @@ -1417,9 +1410,6 @@ ; DISABLE-NEXT: .cfi_offset %rbp, -16 ; DISABLE-NEXT: movq %rsp, %rbp ; DISABLE-NEXT: .cfi_def_cfa_register %rbp -; DISABLE-NEXT: pushq %rbx -; DISABLE-NEXT: pushq %rax -; DISABLE-NEXT: .cfi_offset %rbx, -24 ; DISABLE-NEXT: movq _irreducibleCFGf@{{.*}}(%rip), %rax ; DISABLE-NEXT: cmpb $0, (%rax) ; DISABLE-NEXT: je LBB16_2 @@ -1430,26 +1420,22 @@ ; DISABLE-NEXT: LBB16_2: ## %split ; DISABLE-NEXT: movq _irreducibleCFGb@{{.*}}(%rip), %rax ; DISABLE-NEXT: cmpl $0, (%rax) -; DISABLE-NEXT: je LBB16_3 -; DISABLE-NEXT: ## %bb.4: ## %for.body4.i +; DISABLE-NEXT: je LBB16_4 +; DISABLE-NEXT: ## %bb.3: ## %for.body4.i ; DISABLE-NEXT: movq _irreducibleCFGa@{{.*}}(%rip), %rax ; DISABLE-NEXT: movl (%rax), %edi -; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: callq _something -; DISABLE-NEXT: jmp LBB16_5 -; DISABLE-NEXT: LBB16_3: -; DISABLE-NEXT: xorl %ebx, %ebx +; DISABLE-NEXT: LBB16_4: ## %for.inc +; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: .p2align 4, 0x90 ; DISABLE-NEXT: LBB16_5: ## %for.inc ; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 -; DISABLE-NEXT: incl %ebx -; DISABLE-NEXT: cmpl $7, %ebx +; DISABLE-NEXT: incl %eax +; DISABLE-NEXT: cmpl $7, %eax ; DISABLE-NEXT: jl LBB16_5 ; DISABLE-NEXT: ## %bb.6: ## %fn1.exit ; DISABLE-NEXT: xorl %eax, %eax -; DISABLE-NEXT: addq $8, %rsp -; DISABLE-NEXT: popq %rbx ; DISABLE-NEXT: popq %rbp ; DISABLE-NEXT: retq entry: Index: llvm/test/DebugInfo/X86/live-debug-values.ll =================================================================== --- llvm/test/DebugInfo/X86/live-debug-values.ll +++ llvm/test/DebugInfo/X86/live-debug-values.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=asm %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -filetype=asm %s -o - | FileCheck %s ; Test the extension of debug ranges from predecessors. ; Generated from the source file LiveDebugValues.c: @@ -29,9 +29,9 @@ ; DBG_VALUE for variable "n" is extended into %bb.5 from its predecessors %bb.3 ; and %bb.4. +; CHECK: .LBB0_4: +; CHECK-NEXT: #DEBUG_VALUE: main:n <- $eax ; CHECK: .LBB0_5: -; CHECK-NEXT: #DEBUG_VALUE: main:n <- $ebx -; Other register values have been clobbered. ; CHECK-NOT: #DEBUG_VALUE: ; CHECK: movl %e{{..}}, m(%rip)