diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp --- a/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -99,7 +99,7 @@ EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden, cl::desc("enable the shrink-wrapping pass")); static cl::opt EnablePostShrinkWrapOpt( - "enable-shrink-wrap-region-split", cl::init(false), cl::Hidden, + "enable-shrink-wrap-region-split", cl::init(true), cl::Hidden, cl::desc("enable splitting of the restore block if possible")); namespace { @@ -635,7 +635,10 @@ FindIDom<>(**DirtyPreds.begin(), DirtyPreds, *MDT, false); while (NewSave && (hasDirtyPred(ReachableByDirty, *NewSave) || - EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency())) + EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency() || + /*Entry freq has been observed more than a loop block in + some cases*/ + MLI->getLoopFor(NewSave))) NewSave = FindIDom<>(**NewSave->pred_begin(), NewSave->predecessors(), *MDT, false); diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll --- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll @@ -424,8 +424,8 @@ ; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: b .LBB5_7 ; CHECK-NEXT: .LBB5_3: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: b .LBB5_9 ; CHECK-NEXT: .LBB5_4: // %vector.ph ; CHECK-NEXT: and x11, x10, #0xfffffff0 ; CHECK-NEXT: add x8, x0, #8 diff --git a/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir b/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir --- a/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir +++ b/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir @@ -6,13 +6,12 @@ ; RUN: llc -x=mir -simplify-mir -run-pass=shrink-wrap -o - %s | FileCheck %s ; CHECK: name: compiler_pop_stack ; CHECK: frameInfo: - ; CHECK-NOT: savePoint: - ; CHECK-NOT: restorePoint: + ; CHECK: savePoint: '%bb.1' + ; CHECK: restorePoint: '%bb.7' ; CHECK: name: compiler_pop_stack_no_memoperands ; CHECK: frameInfo: - ; CHECK-NOT: savePoint: - ; CHECK-NOT: restorePoint: - ; CHECK: stack: + ; CHECK: savePoint: '%bb.1' + ; CHECK: restorePoint: '%bb.7' ; CHECK: name: f ; CHECK: frameInfo: ; CHECK: savePoint: '%bb.2' diff --git a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll --- a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll @@ -21,16 +21,16 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly %b) #9 { ; CHECK-LABEL: prune_match: ; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldrh w9, [x1] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne LBB0_47 +; CHECK-NEXT: ; %bb.1: ; %if.end ; CHECK-NEXT: sub sp, sp, #64 ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w9, [x1] -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne LBB0_42 -; CHECK-NEXT: ; %bb.1: ; %if.end ; CHECK-NEXT: Lloh0: ; CHECK-NEXT: adrp x14, __DefaultRuneLocale@GOTPAGE ; CHECK-NEXT: mov x9, xzr @@ -243,7 +243,7 @@ ; CHECK-NEXT: b.eq LBB0_37 ; CHECK-NEXT: LBB0_42: ; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: LBB0_43: ; %return +; CHECK-NEXT: LBB0_43: ; CHECK-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret @@ -259,6 +259,12 @@ ; CHECK-NEXT: ; %bb.46: ; %land.lhs.true52 ; CHECK-NEXT: cbz w8, LBB0_43 ; CHECK-NEXT: b LBB0_12 +; CHECK-NEXT: LBB0_47: +; CHECK-NEXT: .cfi_def_cfa wsp, 0 +; CHECK-NEXT: .cfi_same_value w30 +; CHECK-NEXT: .cfi_same_value w29 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret ; CHECK-NEXT: .loh AdrpLdrGot Lloh0, Lloh1 ; CHECK-NEXT: .loh AdrpLdrGot Lloh2, Lloh3 ; CHECK-NEXT: .loh AdrpLdrGot Lloh4, Lloh5 diff --git a/llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir b/llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir @@ -0,0 +1,760 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple=aarch64 -run-pass=shrink-wrap -o - %s | FileCheck %s + +--- | + define void @shrink_test1(i32 %a) { + entry: + %cmp5 = icmp sgt i32 %a, 0 + br i1 %cmp5, label %BB0, label %exit + + BB0: ; preds = %entry + %call = call i32 @fun() + %c = icmp eq i32 %call, 0 + br i1 %c, label %BB1, label %exit + + BB1: ; preds = %BB0 + %call2 = call i32 @fun() + br label %exit + + exit: ; preds = %BB1, %BB0, %entry + ret void + } + + define void @shrink_test2(i32 %a, ptr %P1, ptr %P2) { + BB00: + %cmp5 = icmp sgt i32 %a, 0 + br i1 %cmp5, label %BB01, label %exit + + BB01: ; preds = %BB00 + store i32 %a, ptr %P1, align 4 + %c1 = icmp sgt i32 %a, 1 + br i1 %c1, label %BB02, label %BB03 + + BB02: ; preds = %BB01 + store i32 %a, ptr %P2, align 4 + br label %BB03 + + BB03: ; preds = %BB02, %BB01 + %call03 = call i32 @fun() + %c03 = icmp eq i32 %call03, 0 + br i1 %c03, label %BB04, label %BB05 + + BB04: ; preds = %BB03 + %call04 = call i32 @fun() + br label %BB05 + + BB05: ; preds = %BB04, %BB03 + %call05 = call i32 @fun() + %c05 = icmp eq i32 %call05, 0 + br i1 %c05, label %BB06, label %BB07 + + BB06: ; preds = %BB05 + %call06 = call i32 @fun() + br label %exit + + BB07: ; preds = %BB05 + %call07 = call i32 @fun2() + br label %exit + + exit: ; preds = %BB07, %BB06, %BB00 + ret void + } + + define void @noshrink_test1(i32 %a, i32 %v, i32 %v2) { + entry: + %cmp5 = icmp sgt i32 %a, 0 + br i1 %cmp5, label %BB0, label %exit + + BB0: ; preds = %entry + %c = icmp eq i32 %a, 10 + %c1 = icmp eq i32 %v, 10 + %or.cond = select i1 %c, i1 %c1, i1 false + br i1 %or.cond, label %BB3, label %BB2 + + BB2: ; preds = %BB0 + %c2 = icmp eq i32 %v2, 10 + br i1 %c2, label %BB4, label %exit + + BB3: ; preds = %BB0 + %call3 = call i32 @fun() + br label %exit + + BB4: ; preds = %BB2 + %call4 = call i32 @fun2() + br label %exit + + exit: ; preds = %BB4, %BB3, %BB2, %entry + ret void + } + + define void @noshrink_test2(i32 %a) { + BB00: + %cmp5 = icmp sgt i32 %a, 0 + br i1 %cmp5, label %BB01, label %InfLoop.preheader + + InfLoop.preheader: ; preds = %BB00 + br label %InfLoop + + BB01: ; preds = %BB00 + %call = call i32 @fun() + %c = icmp eq i32 %call, 0 + br i1 %c, label %BB02, label %exit + + BB02: ; preds = %BB01 + %call2 = call i32 @fun() + br label %exit + + InfLoop: ; preds = %InfLoop.preheader, %InfLoop + %call3 = call i32 @fun() + br label %InfLoop + + exit: ; preds = %BB02, %BB01 + ret void + } + + define void @noshrink_test3(i32 %a) { + BB00: + %cmp5 = icmp sgt i32 %a, 0 + %call02 = call i32 @fun() + br i1 %cmp5, label %BB02, label %BB01 + + BB01: ; preds = %BB00 + %0 = icmp eq i32 %call02, 0 + br i1 %0, label %BB01.1, label %exit + + BB01.1: ; preds = %BB01 + call void @abort() #0 + unreachable + + BB02: ; preds = %BB00 + %1 = icmp eq i32 %call02, 0 + br i1 %1, label %BB03, label %BB04 + + BB03: ; preds = %BB02 + %call03 = call i32 @fun() + %c03 = icmp eq i32 %call03, 0 + br i1 %c03, label %BB04, label %exit + + BB04: ; preds = %BB03, %BB02 + %call04 = call i32 @fun() + br label %exit + + exit: ; preds = %BB04, %BB03, %BB01 + ret void + } + + define void @noshrink_bb_as_inlineasmbr_target(i1 %cond) { + entry: + br i1 %cond, label %0, label %exit + + 0: ; preds = %entry + callbr void asm sideeffect "", "!i,~{flags}"() + to label %1 [label %exit] + + 1: ; preds = %0 + call void @dosomething() + br label %exit + + exit: ; preds = %1, %0, %entry + ret void + } + + declare i32 @fun() + declare i32 @fun2() + declare void @abort() + declare void @dosomething() +... +--- +name: shrink_test1 +alignment: 4 +tracksRegLiveness: true +tracksDebugUserValues: true +liveins: + - { reg: '$w0' } +frameInfo: + maxAlignment: 1 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: shrink_test1 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.3(0x30000000) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 11, %bb.3, implicit killed $nzcv + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.BB0: + ; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.4(0x50000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.4 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.BB1: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: B %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.exit: + ; CHECK-NEXT: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: B %bb.3 + bb.0.entry: + successors: %bb.1(0x50000000), %bb.3(0x30000000) + liveins: $w0 + + dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv + Bcc 11, %bb.3, implicit killed $nzcv + B %bb.1 + + bb.1.BB0: + successors: %bb.2(0x30000000), %bb.3(0x50000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + CBNZW killed renamable $w0, %bb.3 + B %bb.2 + + bb.2.BB1: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + bb.3.exit: + RET_ReallyLR + +... +--- +name: shrink_test2 +alignment: 4 +tracksRegLiveness: true +tracksDebugUserValues: true +liveins: + - { reg: '$w0' } + - { reg: '$x1' } + - { reg: '$x2' } +frameInfo: + maxAlignment: 1 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: shrink_test2 + ; CHECK: bb.0.BB00: + ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.8(0x30000000) + ; CHECK-NEXT: liveins: $w0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 11, %bb.8, implicit killed $nzcv + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.BB01: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: liveins: $w0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv + ; CHECK-NEXT: STRWui renamable $w0, killed renamable $x1, 0 :: (store (s32) into %ir.P1) + ; CHECK-NEXT: Bcc 11, %bb.3, implicit killed $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.BB02: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $w0, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: STRWui killed renamable $w0, killed renamable $x2, 0 :: (store (s32) into %ir.P2) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.BB03: + ; CHECK-NEXT: successors: %bb.4(0x30000000), %bb.5(0x50000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.5 + ; CHECK-NEXT: B %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.BB04: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.BB05: + ; CHECK-NEXT: successors: %bb.6(0x30000000), %bb.7(0x50000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.7 + ; CHECK-NEXT: B %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.BB06: + ; CHECK-NEXT: successors: %bb.9(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: B %bb.9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7.BB07: + ; CHECK-NEXT: successors: %bb.9(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: B %bb.9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8.exit: + ; CHECK-NEXT: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.9: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: B %bb.8 + bb.0.BB00: + successors: %bb.1(0x50000000), %bb.8(0x30000000) + liveins: $w0, $x1, $x2 + + dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv + Bcc 11, %bb.8, implicit killed $nzcv + B %bb.1 + + bb.1.BB01: + successors: %bb.2, %bb.3 + liveins: $w0, $x1, $x2 + + dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv + STRWui renamable $w0, killed renamable $x1, 0 :: (store (s32) into %ir.P1) + Bcc 11, %bb.3, implicit killed $nzcv + B %bb.2 + + bb.2.BB02: + liveins: $w0, $x2 + + STRWui killed renamable $w0, killed renamable $x2, 0 :: (store (s32) into %ir.P2) + + bb.3.BB03: + successors: %bb.4(0x30000000), %bb.5(0x50000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + CBNZW killed renamable $w0, %bb.5 + B %bb.4 + + bb.4.BB04: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + bb.5.BB05: + successors: %bb.6(0x30000000), %bb.7(0x50000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + CBNZW killed renamable $w0, %bb.7 + B %bb.6 + + bb.6.BB06: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + B %bb.8 + + bb.7.BB07: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + bb.8.exit: + RET_ReallyLR + +... +--- +name: noshrink_test1 +alignment: 4 +tracksRegLiveness: true +tracksDebugUserValues: true +liveins: + - { reg: '$w0' } + - { reg: '$w1' } + - { reg: '$w2' } +frameInfo: + maxAlignment: 1 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: noshrink_test1 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.6(0x30000000) + ; CHECK-NEXT: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 11, %bb.6, implicit killed $nzcv + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.BB0: + ; CHECK-NEXT: successors: %bb.2(0x60000000), %bb.3(0x20000000) + ; CHECK-NEXT: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 10, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.BB0: + ; CHECK-NEXT: successors: %bb.4(0x55555555), %bb.3(0x2aaaaaab) + ; CHECK-NEXT: liveins: $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w1, 10, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.4, implicit killed $nzcv + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.BB2: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: liveins: $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w2, 10, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 0, %bb.5, implicit killed $nzcv + ; CHECK-NEXT: B %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.BB3: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: B %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.BB4: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.exit: + ; CHECK-NEXT: RET_ReallyLR + bb.0.entry: + successors: %bb.1(0x50000000), %bb.6(0x30000000) + liveins: $w0, $w1, $w2 + + dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv + Bcc 11, %bb.6, implicit killed $nzcv + B %bb.1 + + bb.1.BB0: + successors: %bb.2(0x60000000), %bb.3(0x20000000) + liveins: $w0, $w1, $w2 + + dead $wzr = SUBSWri killed renamable $w0, 10, 0, implicit-def $nzcv + Bcc 1, %bb.3, implicit killed $nzcv + B %bb.2 + + bb.2.BB0: + successors: %bb.4(0x55555555), %bb.3(0x2aaaaaab) + liveins: $w1, $w2 + + dead $wzr = SUBSWri killed renamable $w1, 10, 0, implicit-def $nzcv + Bcc 0, %bb.4, implicit killed $nzcv + B %bb.3 + + bb.3.BB2: + liveins: $w2 + + dead $wzr = SUBSWri killed renamable $w2, 10, 0, implicit-def $nzcv + Bcc 0, %bb.5, implicit killed $nzcv + B %bb.6 + + bb.4.BB3: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + B %bb.6 + + bb.5.BB4: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + bb.6.exit: + RET_ReallyLR + +... +--- +name: noshrink_test2 +alignment: 4 +tracksRegLiveness: true +tracksDebugUserValues: true +liveins: + - { reg: '$w0' } +frameInfo: + maxAlignment: 1 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: noshrink_test2 + ; CHECK: bb.0.BB00: + ; CHECK-NEXT: successors: %bb.2(0x50000000), %bb.1(0x30000000) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 12, %bb.2, implicit killed $nzcv + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: B %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.BB01: + ; CHECK-NEXT: successors: %bb.3(0x30000000), %bb.5(0x50000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.5 + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.BB02: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: B %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.InfLoop: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: B %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.exit: + ; CHECK-NEXT: RET_ReallyLR + bb.0.BB00: + successors: %bb.2(0x50000000), %bb.1(0x30000000) + liveins: $w0 + + dead $wzr = SUBSWri killed renamable $w0, 0, 0, implicit-def $nzcv + Bcc 12, %bb.2, implicit killed $nzcv + + bb.1: + B %bb.4 + + bb.2.BB01: + successors: %bb.3(0x30000000), %bb.5(0x50000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + CBNZW killed renamable $w0, %bb.5 + B %bb.3 + + bb.3.BB02: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + B %bb.5 + + bb.4.InfLoop: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + B %bb.4 + + bb.5.exit: + RET_ReallyLR + +... +--- +name: noshrink_test3 +alignment: 4 +tracksRegLiveness: true +tracksDebugUserValues: true +liveins: + - { reg: '$w0' } +frameInfo: + maxAlignment: 1 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: noshrink_test3 + ; CHECK: bb.0.BB00: + ; CHECK-NEXT: successors: %bb.3(0x50000000), %bb.1(0x30000000) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w19 = COPY $w0 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w19, 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 12, %bb.3, implicit killed $nzcv + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.BB01: + ; CHECK-NEXT: successors: %bb.2(0x00000800), %bb.6(0x7ffff800) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.6 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.BB01.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @abort, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.BB02: + ; CHECK-NEXT: successors: %bb.4(0x30000000), %bb.5(0x50000000) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.5 + ; CHECK-NEXT: B %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.BB03: + ; CHECK-NEXT: successors: %bb.5(0x30000000), %bb.6(0x50000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.6 + ; CHECK-NEXT: B %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.BB04: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.exit: + ; CHECK-NEXT: RET_ReallyLR + bb.0.BB00: + successors: %bb.3(0x50000000), %bb.1(0x30000000) + liveins: $w0 + + renamable $w19 = COPY $w0 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + dead $wzr = SUBSWri killed renamable $w19, 0, 0, implicit-def $nzcv + Bcc 12, %bb.3, implicit killed $nzcv + B %bb.1 + + bb.1.BB01: + successors: %bb.2(0x00000800), %bb.6(0x7ffff800) + liveins: $w0 + + CBNZW killed renamable $w0, %bb.6 + B %bb.2 + + bb.2.BB01.1: + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @abort, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + bb.3.BB02: + successors: %bb.4(0x30000000), %bb.5(0x50000000) + liveins: $w0 + + CBNZW killed renamable $w0, %bb.5 + B %bb.4 + + bb.4.BB03: + successors: %bb.5(0x30000000), %bb.6(0x50000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + CBNZW killed renamable $w0, %bb.6 + B %bb.5 + + bb.5.BB04: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + bb.6.exit: + RET_ReallyLR + +... +--- +name: noshrink_bb_as_inlineasmbr_target +registers: [] +liveins: + - { reg: '$w0', virtual-reg: '' } +frameInfo: + savePoint: '' + restorePoint: '' +body: | + ; CHECK-LABEL: name: noshrink_bb_as_inlineasmbr_target + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: TBZW killed renamable $w0, 0, %bb.3 + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1 (%ir-block.0): + ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.3(0x00000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: INLINEASM_BR &"", 1 /* sideeffect attdialect */, 13 /* imm */, %bb.3 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2 (%ir-block.1): + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @dosomething, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.exit (machine-block-address-taken, inlineasm-br-indirect-target): + ; CHECK-NEXT: RET_ReallyLR + bb.0.entry: + successors: %bb.1(0x40000000), %bb.3(0x40000000) + liveins: $w0 + + TBZW killed renamable $w0, 0, %bb.3 + B %bb.1 + + bb.1 (%ir-block.0): + successors: %bb.2(0x80000000), %bb.3(0x00000000) + + INLINEASM_BR &"", 1 /* sideeffect attdialect */, 13 /* imm */, %bb.3 + B %bb.2 + + bb.2 (%ir-block.1): + successors: %bb.3(0x80000000) + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @dosomething, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + bb.3.exit (machine-block-address-taken, inlineasm-br-indirect-target): + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AArch64/taildup-cfi.ll b/llvm/test/CodeGen/AArch64/taildup-cfi.ll --- a/llvm/test/CodeGen/AArch64/taildup-cfi.ll +++ b/llvm/test/CodeGen/AArch64/taildup-cfi.ll @@ -32,7 +32,7 @@ store i32 0, ptr @f, align 4, !tbaa !2 br label %if.end -; DARWIN-NOT: Merging into block +; DARWIN: Merging into block ; LINUX: Merging into block if.end: ; preds = %entry.if.end_crit_edge, %if.then diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll --- a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll @@ -5,11 +5,11 @@ define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) { ; CHECK-LE-LABEL: add_user: ; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .save {r4, lr} -; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: cmp r0, #1 ; CHECK-LE-NEXT: blt .LBB0_4 ; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-LE-NEXT: .save {r4, lr} +; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: sub.w lr, r3, #2 ; CHECK-LE-NEXT: subs r2, #2 ; CHECK-LE-NEXT: mov.w r12, #0 @@ -22,22 +22,23 @@ ; CHECK-LE-NEXT: sxtah r1, r1, r3 ; CHECK-LE-NEXT: smlad r12, r4, r3, r12 ; CHECK-LE-NEXT: bne .LBB0_2 -; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-LE-NEXT: @ %bb.3: +; CHECK-LE-NEXT: pop.w {r4, lr} ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: bx lr ; CHECK-LE-NEXT: .LBB0_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: add_user: ; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .save {r4, r5, r7, lr} -; CHECK-BE-NEXT: push {r4, r5, r7, lr} ; CHECK-BE-NEXT: cmp r0, #1 ; CHECK-BE-NEXT: blt .LBB0_4 ; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-BE-NEXT: .save {r4, r5, r7, lr} +; CHECK-BE-NEXT: push {r4, r5, r7, lr} ; CHECK-BE-NEXT: subs r3, #2 ; CHECK-BE-NEXT: subs r2, #2 ; CHECK-BE-NEXT: mov.w r12, #0 @@ -53,14 +54,15 @@ ; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2] ; CHECK-BE-NEXT: smlabb r12, r5, r4, r12 ; CHECK-BE-NEXT: bne .LBB0_2 -; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-BE-NEXT: @ %bb.3: +; CHECK-BE-NEXT: pop.w {r4, r5, r7, lr} ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: pop {r4, r5, r7, pc} +; CHECK-BE-NEXT: bx lr ; CHECK-BE-NEXT: .LBB0_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: pop {r4, r5, r7, pc} +; CHECK-BE-NEXT: bx lr entry: %cmp24 = icmp sgt i32 %arg, 0 br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup @@ -105,11 +107,11 @@ define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) { ; CHECK-LE-LABEL: mul_bottom_user: ; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .save {r4, lr} -; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: cmp r0, #1 ; CHECK-LE-NEXT: blt .LBB1_4 ; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-LE-NEXT: .save {r4, lr} +; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: sub.w lr, r3, #2 ; CHECK-LE-NEXT: subs r2, #2 ; CHECK-LE-NEXT: mov.w r12, #0 @@ -123,22 +125,23 @@ ; CHECK-LE-NEXT: sxth r3, r3 ; CHECK-LE-NEXT: mul r1, r3, r1 ; CHECK-LE-NEXT: bne .LBB1_2 -; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-LE-NEXT: @ %bb.3: +; CHECK-LE-NEXT: pop.w {r4, lr} ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: bx lr ; CHECK-LE-NEXT: .LBB1_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: mul_bottom_user: ; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .save {r4, r5, r7, lr} -; CHECK-BE-NEXT: push {r4, r5, r7, lr} ; CHECK-BE-NEXT: cmp r0, #1 ; CHECK-BE-NEXT: blt .LBB1_4 ; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-BE-NEXT: .save {r4, r5, r7, lr} +; CHECK-BE-NEXT: push {r4, r5, r7, lr} ; CHECK-BE-NEXT: subs r3, #2 ; CHECK-BE-NEXT: subs r2, #2 ; CHECK-BE-NEXT: mov.w r12, #0 @@ -154,14 +157,15 @@ ; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2] ; CHECK-BE-NEXT: smlabb r12, r5, r4, r12 ; CHECK-BE-NEXT: bne .LBB1_2 -; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-BE-NEXT: @ %bb.3: +; CHECK-BE-NEXT: pop.w {r4, r5, r7, lr} ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: pop {r4, r5, r7, pc} +; CHECK-BE-NEXT: bx lr ; CHECK-BE-NEXT: .LBB1_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: pop {r4, r5, r7, pc} +; CHECK-BE-NEXT: bx lr entry: %cmp24 = icmp sgt i32 %arg, 0 br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup @@ -206,11 +210,11 @@ define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) { ; CHECK-LE-LABEL: mul_top_user: ; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .save {r4, lr} -; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: cmp r0, #1 ; CHECK-LE-NEXT: blt .LBB2_4 ; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-LE-NEXT: .save {r4, lr} +; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: subs r3, #2 ; CHECK-LE-NEXT: subs r2, #2 ; CHECK-LE-NEXT: mov.w r12, #0 @@ -224,22 +228,23 @@ ; CHECK-LE-NEXT: asr.w r4, r4, #16 ; CHECK-LE-NEXT: mul r1, r4, r1 ; CHECK-LE-NEXT: bne .LBB2_2 -; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-LE-NEXT: @ %bb.3: +; CHECK-LE-NEXT: pop.w {r4, lr} ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: bx lr ; CHECK-LE-NEXT: .LBB2_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: mul_top_user: ; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .save {r4, lr} -; CHECK-BE-NEXT: push {r4, lr} ; CHECK-BE-NEXT: cmp r0, #1 ; CHECK-BE-NEXT: blt .LBB2_4 ; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-BE-NEXT: .save {r4, lr} +; CHECK-BE-NEXT: push {r4, lr} ; CHECK-BE-NEXT: subs r3, #2 ; CHECK-BE-NEXT: subs r2, #2 ; CHECK-BE-NEXT: mov.w r12, #0 @@ -255,14 +260,15 @@ ; CHECK-BE-NEXT: mul r1, r4, r1 ; CHECK-BE-NEXT: smlabb r12, r4, lr, r12 ; CHECK-BE-NEXT: bne .LBB2_2 -; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-BE-NEXT: @ %bb.3: +; CHECK-BE-NEXT: pop.w {r4, lr} ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: pop {r4, pc} +; CHECK-BE-NEXT: bx lr ; CHECK-BE-NEXT: .LBB2_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: pop {r4, pc} +; CHECK-BE-NEXT: bx lr entry: %cmp24 = icmp sgt i32 %arg, 0 br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup @@ -307,11 +313,11 @@ define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) { ; CHECK-LE-LABEL: and_user: ; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .save {r4, lr} -; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: cmp r0, #1 ; CHECK-LE-NEXT: blt .LBB3_4 ; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-LE-NEXT: .save {r4, lr} +; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: sub.w lr, r3, #2 ; CHECK-LE-NEXT: subs r2, #2 ; CHECK-LE-NEXT: mov.w r12, #0 @@ -325,22 +331,23 @@ ; CHECK-LE-NEXT: uxth r3, r3 ; CHECK-LE-NEXT: mul r1, r3, r1 ; CHECK-LE-NEXT: bne .LBB3_2 -; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-LE-NEXT: @ %bb.3: +; CHECK-LE-NEXT: pop.w {r4, lr} ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: bx lr ; CHECK-LE-NEXT: .LBB3_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: pop {r4, pc} +; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: and_user: ; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .save {r4, r5, r7, lr} -; CHECK-BE-NEXT: push {r4, r5, r7, lr} ; CHECK-BE-NEXT: cmp r0, #1 ; CHECK-BE-NEXT: blt .LBB3_4 ; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-BE-NEXT: .save {r4, r5, r7, lr} +; CHECK-BE-NEXT: push {r4, r5, r7, lr} ; CHECK-BE-NEXT: subs r3, #2 ; CHECK-BE-NEXT: subs r2, #2 ; CHECK-BE-NEXT: mov.w r12, #0 @@ -356,14 +363,15 @@ ; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2] ; CHECK-BE-NEXT: smlabb r12, r5, r4, r12 ; CHECK-BE-NEXT: bne .LBB3_2 -; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-BE-NEXT: @ %bb.3: +; CHECK-BE-NEXT: pop.w {r4, r5, r7, lr} ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: pop {r4, r5, r7, pc} +; CHECK-BE-NEXT: bx lr ; CHECK-BE-NEXT: .LBB3_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: pop {r4, r5, r7, pc} +; CHECK-BE-NEXT: bx lr entry: %cmp24 = icmp sgt i32 %arg, 0 br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/ARM/code-placement.ll b/llvm/test/CodeGen/ARM/code-placement.ll --- a/llvm/test/CodeGen/ARM/code-placement.ll +++ b/llvm/test/CodeGen/ARM/code-placement.ll @@ -11,7 +11,6 @@ br i1 %0, label %bb2, label %bb bb: -; CHECK: LBB0_1: ; CHECK: LBB0_[[LABEL:[0-9]]]: ; CHECK: bne LBB0_[[LABEL]] ; CHECK-NOT: b LBB0_[[LABEL]] diff --git a/llvm/test/CodeGen/ARM/mbp.ll b/llvm/test/CodeGen/ARM/mbp.ll --- a/llvm/test/CodeGen/ARM/mbp.ll +++ b/llvm/test/CodeGen/ARM/mbp.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s | FileCheck %s target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv7-unknown-linux-gnueabihf" @@ -6,16 +7,50 @@ %List = type { i32, ptr } ; The entry block should be the first block of the function. -; CHECK-LABEL: foo -; CHECK: %entry -; CHECK: %for.body -; CHECK: %for.inc -; CHECK: %if.then -; CHECK: %for.cond.i -; CHECK: %for.body.i -; CHECK: %return define i1 @foo(ptr %ha, i32 %he) !prof !39 { +; CHECK-LABEL: foo: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: ldr r2, [r0] +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: itt eq +; CHECK-NEXT: moveq r0, #0 +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB0_1: @ %for.body.preheader +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_2: @ %for.inc +; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: ldr r2, [r2] +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: it eq +; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB0_3: @ %for.body +; CHECK-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-NEXT: @ Child Loop BB0_5 Depth 2 +; CHECK-NEXT: ldr r0, [r2, #4] +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: beq .LBB0_2 +; CHECK-NEXT: @ %bb.4: @ %if.then +; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: ldrd r3, r0, [r0] +; CHECK-NEXT: sub.w r12, r0, #4 +; CHECK-NEXT: .LBB0_5: @ %for.cond.i +; CHECK-NEXT: @ Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-NEXT: cmp r3, #1 +; CHECK-NEXT: blt .LBB0_2 +; CHECK-NEXT: @ %bb.6: @ %for.body.i +; CHECK-NEXT: @ in Loop: Header=BB0_5 Depth=2 +; CHECK-NEXT: ldr.w lr, [r12, r3, lsl #2] +; CHECK-NEXT: subs r3, #1 +; CHECK-NEXT: movs r0, #1 +; CHECK-NEXT: cmp lr, r1 +; CHECK-NEXT: bne .LBB0_5 +; CHECK-NEXT: @ %bb.7: +; CHECK-NEXT: pop {r7, pc} entry: %TargetPtr = load ptr, ptr %ha, align 4 %cmp1 = icmp eq ptr %TargetPtr, null diff --git a/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll --- a/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll +++ b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll @@ -6,11 +6,11 @@ define void @ssat_unroll(ptr %pSrcA, ptr %pSrcB, ptr %pDst, i32 %blockSize) { ; CHECK-LABEL: ssat_unroll: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB0_1: @ %while.body.preheader ; CHECK-NEXT: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: beq .LBB0_5 -; CHECK-NEXT: @ %bb.1: @ %while.body.preheader ; CHECK-NEXT: sub r12, r3, #1 ; CHECK-NEXT: tst r3, #1 ; CHECK-NEXT: beq .LBB0_3 @@ -23,7 +23,7 @@ ; CHECK-NEXT: mov r3, r12 ; CHECK-NEXT: .LBB0_3: @ %while.body.prol.loopexit ; CHECK-NEXT: cmp r12, #0 -; CHECK-NEXT: popeq {r11, pc} +; CHECK-NEXT: beq .LBB0_5 ; CHECK-NEXT: .LBB0_4: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrsh r12, [r0] @@ -41,8 +41,9 @@ ; CHECK-NEXT: strh r12, [r2, #2] ; CHECK-NEXT: add r2, r2, #4 ; CHECK-NEXT: bne .LBB0_4 -; CHECK-NEXT: .LBB0_5: @ %while.end -; CHECK-NEXT: pop {r11, pc} +; CHECK-NEXT: .LBB0_5: +; CHECK-NEXT: pop {r11, lr} +; CHECK-NEXT: bx lr entry: %cmp.not7 = icmp eq i32 %blockSize, 0 br i1 %cmp.not7, label %while.end, label %while.body.preheader @@ -125,11 +126,11 @@ define void @ssat_unroll_minmax(ptr nocapture readonly %pSrcA, ptr nocapture readonly %pSrcB, ptr nocapture writeonly %pDst, i32 %blockSize) { ; CHECK-LABEL: ssat_unroll_minmax: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB1_1: @ %while.body.preheader ; CHECK-NEXT: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: beq .LBB1_5 -; CHECK-NEXT: @ %bb.1: @ %while.body.preheader ; CHECK-NEXT: sub r12, r3, #1 ; CHECK-NEXT: tst r3, #1 ; CHECK-NEXT: beq .LBB1_3 @@ -142,7 +143,7 @@ ; CHECK-NEXT: mov r3, r12 ; CHECK-NEXT: .LBB1_3: @ %while.body.prol.loopexit ; CHECK-NEXT: cmp r12, #0 -; CHECK-NEXT: popeq {r11, pc} +; CHECK-NEXT: beq .LBB1_5 ; CHECK-NEXT: .LBB1_4: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrsh r12, [r0] @@ -160,8 +161,9 @@ ; CHECK-NEXT: strh r12, [r2, #2] ; CHECK-NEXT: add r2, r2, #4 ; CHECK-NEXT: bne .LBB1_4 -; CHECK-NEXT: .LBB1_5: @ %while.end -; CHECK-NEXT: pop {r11, pc} +; CHECK-NEXT: .LBB1_5: +; CHECK-NEXT: pop {r11, lr} +; CHECK-NEXT: bx lr entry: %cmp.not7 = icmp eq i32 %blockSize, 0 br i1 %cmp.not7, label %while.end, label %while.body.preheader diff --git a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll --- a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll +++ b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll @@ -39,19 +39,19 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmplwi r6, 0 ; CHECK-NEXT: cmpwi cr1, r6, 0 -; CHECK-NEXT: stw r30, -8(r1) # 4-byte Folded Spill -; CHECK-NEXT: stw r31, -4(r1) # 4-byte Folded Spill ; CHECK-NEXT: crandc 4*cr5+lt, 4*cr1+lt, eq ; CHECK-NEXT: cmpwi cr1, r7, 0 -; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_5 +; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_6 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: crand 4*cr5+lt, eq, 4*cr1+eq -; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_5 +; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_6 ; CHECK-NEXT: # %bb.2: # %for.body.preheader ; CHECK-NEXT: slwi r8, r4, 1 ; CHECK-NEXT: li r10, 0 ; CHECK-NEXT: li r11, 0 +; CHECK-NEXT: stw r30, -8(r1) # 4-byte Folded Spill ; CHECK-NEXT: add r8, r4, r8 +; CHECK-NEXT: stw r31, -4(r1) # 4-byte Folded Spill ; CHECK-NEXT: add r9, r5, r8 ; CHECK-NEXT: add r5, r5, r4 ; CHECK-NEXT: add r8, r3, r5 @@ -83,15 +83,15 @@ ; CHECK-NEXT: # ; CHECK-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt ; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_3 -; CHECK-NEXT: b L..BB0_6 -; CHECK-NEXT: L..BB0_5: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: li r5, 0 -; CHECK-NEXT: L..BB0_6: # %for.cond.cleanup +; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: lwz r31, -4(r1) # 4-byte Folded Reload ; CHECK-NEXT: lwz r30, -8(r1) # 4-byte Folded Reload ; CHECK-NEXT: mr r4, r5 ; CHECK-NEXT: blr +; CHECK-NEXT: L..BB0_6: +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: blr entry: %add = add nsw i32 %base1, %offset %mul = shl nsw i32 %offset, 1 diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll --- a/llvm/test/CodeGen/PowerPC/common-chain.ll +++ b/llvm/test/CodeGen/PowerPC/common-chain.ll @@ -137,14 +137,14 @@ ; CHECK-LABEL: not_perfect_chain_all_same_offset_fail: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmpdi r6, 0 -; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: ble cr0, .LBB1_4 ; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: sldi r7, r4, 1 -; CHECK-NEXT: sldi r9, r4, 2 ; CHECK-NEXT: add r5, r3, r5 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: add r8, r4, r7 +; CHECK-NEXT: sldi r9, r4, 2 ; CHECK-NEXT: mtctr r6 ; CHECK-NEXT: add r10, r4, r9 ; CHECK-NEXT: .p2align 4 @@ -161,12 +161,11 @@ ; CHECK-NEXT: mulld r6, r6, r0 ; CHECK-NEXT: maddld r3, r6, r30, r3 ; CHECK-NEXT: bdnz .LBB1_2 -; CHECK-NEXT: # %bb.3: # %for.cond.cleanup +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr entry: %mul = shl nsw i64 %offset, 1 @@ -425,20 +424,20 @@ ; CHECK-LABEL: not_same_offset_fail: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmpdi r6, 0 +; CHECK-NEXT: ble cr0, .LBB4_4 +; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-NEXT: add r5, r3, r5 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: ble cr0, .LBB4_3 -; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: mtctr r6 ; CHECK-NEXT: mulli r11, r4, 10 ; CHECK-NEXT: sldi r8, r4, 2 -; CHECK-NEXT: add r5, r3, r5 -; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: add r8, r4, r8 ; CHECK-NEXT: sldi r9, r4, 3 -; CHECK-NEXT: mtctr r6 -; CHECK-NEXT: sldi r7, r4, 1 ; CHECK-NEXT: sub r10, r9, r4 +; CHECK-NEXT: sldi r7, r4, 1 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB4_2: # %for.body ; CHECK-NEXT: # @@ -455,14 +454,14 @@ ; CHECK-NEXT: mulld r6, r6, r29 ; CHECK-NEXT: maddld r3, r6, r28, r3 ; CHECK-NEXT: bdnz .LBB4_2 -; CHECK-NEXT: b .LBB4_4 -; CHECK-NEXT: .LBB4_3: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: .LBB4_4: # %for.cond.cleanup +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr +; CHECK-NEXT: .LBB4_4: +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: blr entry: %mul = shl nsw i64 %offset, 1 %mul2 = mul nsw i64 %offset, 5 diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll --- a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll @@ -192,21 +192,21 @@ ; CHECK-LABEL: test_max_number_reminder: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: cmplwi r4, 0 -; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: beq cr0, .LBB2_3 +; CHECK-NEXT: beq cr0, .LBB2_4 ; CHECK-NEXT: # %bb.1: # %bb3.preheader ; CHECK-NEXT: cmpldi r4, 1 ; CHECK-NEXT: li r5, 1 ; CHECK-NEXT: addi r9, r3, 4002 +; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill ; CHECK-NEXT: li r6, -1 +; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill ; CHECK-NEXT: li r7, 3 ; CHECK-NEXT: li r8, 5 ; CHECK-NEXT: li r10, 9 +; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: iselgt r3, r4, r5 ; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: li r3, 0 @@ -232,10 +232,7 @@ ; CHECK-NEXT: mulld r11, r11, r26 ; CHECK-NEXT: maddld r3, r11, r25, r3 ; CHECK-NEXT: bdnz .LBB2_2 -; CHECK-NEXT: b .LBB2_4 -; CHECK-NEXT: .LBB2_3: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: .LBB2_4: # %bb45 +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload @@ -244,6 +241,9 @@ ; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r25, -56(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr +; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: addi r3, r4, 0 +; CHECK-NEXT: blr bb: %i = sext i32 %arg1 to i64 %i2 = icmp eq i32 %arg1, 0 @@ -475,11 +475,11 @@ ; CHECK-LABEL: test_ds_multiple_chains: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: cmplwi r5, 0 -; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: beq cr0, .LBB5_3 +; CHECK-NEXT: beq cr0, .LBB5_4 ; CHECK-NEXT: # %bb.1: # %bb4.preheader ; CHECK-NEXT: cmpldi r5, 1 ; CHECK-NEXT: li r6, 1 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: addi r3, r3, 4001 ; CHECK-NEXT: addi r4, r4, 4001 ; CHECK-NEXT: li r7, 9 @@ -507,13 +507,13 @@ ; CHECK-NEXT: mulld r8, r8, r30 ; CHECK-NEXT: maddld r6, r8, r9, r6 ; CHECK-NEXT: bdnz .LBB5_2 -; CHECK-NEXT: b .LBB5_4 -; CHECK-NEXT: .LBB5_3: -; CHECK-NEXT: li r6, 0 -; CHECK-NEXT: .LBB5_4: # %bb43 +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: add r3, r6, r5 ; CHECK-NEXT: blr +; CHECK-NEXT: .LBB5_4: +; CHECK-NEXT: addi r3, r5, 0 +; CHECK-NEXT: blr bb: %i = sext i32 %arg2 to i64 %i3 = icmp eq i32 %arg2, 0 @@ -595,17 +595,17 @@ ; CHECK-LABEL: test_ds_cross_basic_blocks: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: cmplwi r4, 0 -; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: beq cr0, .LBB6_8 +; CHECK-NEXT: beq cr0, .LBB6_9 ; CHECK-NEXT: # %bb.1: # %bb3 ; CHECK-NEXT: addis r5, r2, .LC0@toc@ha ; CHECK-NEXT: cmpldi r4, 1 ; CHECK-NEXT: li r7, 1 ; CHECK-NEXT: addi r6, r3, 4009 +; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-NEXT: ld r5, .LC0@toc@l(r5) ; CHECK-NEXT: iselgt r3, r4, r7 +; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: li r4, -7 ; CHECK-NEXT: li r8, -6 ; CHECK-NEXT: li r9, 1 @@ -634,7 +634,7 @@ ; CHECK-NEXT: mulld r0, r0, r10 ; CHECK-NEXT: mulld r0, r0, r9 ; CHECK-NEXT: maddld r3, r0, r7, r3 -; CHECK-NEXT: bdz .LBB6_9 +; CHECK-NEXT: bdz .LBB6_8 ; CHECK-NEXT: .LBB6_4: # %bb5 ; CHECK-NEXT: # ; CHECK-NEXT: lbzu r0, 1(r5) @@ -666,12 +666,13 @@ ; CHECK-NEXT: add r7, r0, r7 ; CHECK-NEXT: b .LBB6_3 ; CHECK-NEXT: .LBB6_8: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: .LBB6_9: # %bb64 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr +; CHECK-NEXT: .LBB6_9: +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: blr bb: %i = sext i32 %arg1 to i64 %i2 = icmp eq i32 %arg1, 0 diff --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll --- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll +++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll @@ -6,24 +6,24 @@ ; CHECK-LABEL: foo: ; CHECK: # %bb.0: ; CHECK-NEXT: cmpd 5, 7 -; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill -; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill -; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill -; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill -; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill +; CHECK-NEXT: bgelr 0 +; CHECK-NEXT: # %bb.1: # %.preheader ; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill +; CHECK-NEXT: addi 27, 5, 2 ; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill -; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: addi 28, 5, 3 ; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill -; CHECK-NEXT: bge 0, .LBB0_6 -; CHECK-NEXT: # %bb.1: # %.preheader ; CHECK-NEXT: addi 30, 5, 1 -; CHECK-NEXT: addi 28, 5, 3 -; CHECK-NEXT: addi 27, 5, 2 ; CHECK-NEXT: mulld 12, 8, 5 -; CHECK-NEXT: addi 29, 3, 16 ; CHECK-NEXT: mulld 0, 9, 8 +; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: addi 29, 3, 16 ; CHECK-NEXT: sldi 11, 10, 3 +; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill +; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill +; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill +; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill ; CHECK-NEXT: mulld 30, 8, 30 ; CHECK-NEXT: mulld 28, 8, 28 ; CHECK-NEXT: mulld 8, 8, 27 diff --git a/llvm/test/CodeGen/PowerPC/shrink-wrap.ll b/llvm/test/CodeGen/PowerPC/shrink-wrap.ll --- a/llvm/test/CodeGen/PowerPC/shrink-wrap.ll +++ b/llvm/test/CodeGen/PowerPC/shrink-wrap.ll @@ -7,6 +7,9 @@ ; POWERPC64-LABEL: shrinkwrapme: ; POWERPC64: # %bb.0: # %entry ; POWERPC64-NEXT: cmpwi 4, 0 +; POWERPC64-NEXT: ble 0, .LBB0_4 +; POWERPC64-NEXT: # %bb.1: # %for.body.preheader +; POWERPC64-NEXT: addi 4, 4, -1 ; POWERPC64-NEXT: std 14, -144(1) # 8-byte Folded Spill ; POWERPC64-NEXT: std 15, -136(1) # 8-byte Folded Spill ; POWERPC64-NEXT: std 16, -128(1) # 8-byte Folded Spill @@ -22,14 +25,11 @@ ; POWERPC64-NEXT: std 26, -48(1) # 8-byte Folded Spill ; POWERPC64-NEXT: std 27, -40(1) # 8-byte Folded Spill ; POWERPC64-NEXT: std 28, -32(1) # 8-byte Folded Spill +; POWERPC64-NEXT: clrldi 4, 4, 32 +; POWERPC64-NEXT: addi 4, 4, 1 ; POWERPC64-NEXT: std 29, -24(1) # 8-byte Folded Spill ; POWERPC64-NEXT: std 30, -16(1) # 8-byte Folded Spill ; POWERPC64-NEXT: std 31, -8(1) # 8-byte Folded Spill -; POWERPC64-NEXT: ble 0, .LBB0_3 -; POWERPC64-NEXT: # %bb.1: # %for.body.preheader -; POWERPC64-NEXT: addi 4, 4, -1 -; POWERPC64-NEXT: clrldi 4, 4, 32 -; POWERPC64-NEXT: addi 4, 4, 1 ; POWERPC64-NEXT: mtctr 4 ; POWERPC64-NEXT: li 4, 0 ; POWERPC64-NEXT: .p2align 4 @@ -39,10 +39,7 @@ ; POWERPC64-NEXT: add 4, 3, 4 ; POWERPC64-NEXT: #NO_APP ; POWERPC64-NEXT: bdnz .LBB0_2 -; POWERPC64-NEXT: b .LBB0_4 -; POWERPC64-NEXT: .LBB0_3: -; POWERPC64-NEXT: li 4, 0 -; POWERPC64-NEXT: .LBB0_4: # %for.cond.cleanup +; POWERPC64-NEXT: # %bb.3: ; POWERPC64-NEXT: ld 31, -8(1) # 8-byte Folded Reload ; POWERPC64-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; POWERPC64-NEXT: ld 29, -24(1) # 8-byte Folded Reload @@ -63,10 +60,16 @@ ; POWERPC64-NEXT: ld 15, -136(1) # 8-byte Folded Reload ; POWERPC64-NEXT: ld 14, -144(1) # 8-byte Folded Reload ; POWERPC64-NEXT: blr +; POWERPC64-NEXT: .LBB0_4: +; POWERPC64-NEXT: li 4, 0 +; POWERPC64-NEXT: extsw 3, 4 +; POWERPC64-NEXT: blr ; ; POWERPC32-AIX-LABEL: shrinkwrapme: ; POWERPC32-AIX: # %bb.0: # %entry ; POWERPC32-AIX-NEXT: cmpwi 4, 0 +; POWERPC32-AIX-NEXT: ble 0, L..BB0_4 +; POWERPC32-AIX-NEXT: # %bb.1: # %for.body.preheader ; POWERPC32-AIX-NEXT: stw 14, -72(1) # 4-byte Folded Spill ; POWERPC32-AIX-NEXT: stw 15, -68(1) # 4-byte Folded Spill ; POWERPC32-AIX-NEXT: stw 16, -64(1) # 4-byte Folded Spill @@ -85,8 +88,6 @@ ; POWERPC32-AIX-NEXT: stw 29, -12(1) # 4-byte Folded Spill ; POWERPC32-AIX-NEXT: stw 30, -8(1) # 4-byte Folded Spill ; POWERPC32-AIX-NEXT: stw 31, -4(1) # 4-byte Folded Spill -; POWERPC32-AIX-NEXT: ble 0, L..BB0_3 -; POWERPC32-AIX-NEXT: # %bb.1: # %for.body.preheader ; POWERPC32-AIX-NEXT: mtctr 4 ; POWERPC32-AIX-NEXT: li 4, 0 ; POWERPC32-AIX-NEXT: .align 4 @@ -96,10 +97,7 @@ ; POWERPC32-AIX-NEXT: add 4, 3, 4 ; POWERPC32-AIX-NEXT: #NO_APP ; POWERPC32-AIX-NEXT: bdnz L..BB0_2 -; POWERPC32-AIX-NEXT: b L..BB0_4 -; POWERPC32-AIX-NEXT: L..BB0_3: -; POWERPC32-AIX-NEXT: li 4, 0 -; POWERPC32-AIX-NEXT: L..BB0_4: # %for.cond.cleanup +; POWERPC32-AIX-NEXT: # %bb.3: ; POWERPC32-AIX-NEXT: lwz 31, -4(1) # 4-byte Folded Reload ; POWERPC32-AIX-NEXT: lwz 30, -8(1) # 4-byte Folded Reload ; POWERPC32-AIX-NEXT: lwz 29, -12(1) # 4-byte Folded Reload @@ -120,10 +118,16 @@ ; POWERPC32-AIX-NEXT: lwz 15, -68(1) # 4-byte Folded Reload ; POWERPC32-AIX-NEXT: lwz 14, -72(1) # 4-byte Folded Reload ; POWERPC32-AIX-NEXT: blr +; POWERPC32-AIX-NEXT: L..BB0_4: +; POWERPC32-AIX-NEXT: li 3, 0 +; POWERPC32-AIX-NEXT: blr ; ; POWERPC64-AIX-LABEL: shrinkwrapme: ; POWERPC64-AIX: # %bb.0: # %entry ; POWERPC64-AIX-NEXT: cmpwi 4, 1 +; POWERPC64-AIX-NEXT: blt 0, L..BB0_4 +; POWERPC64-AIX-NEXT: # %bb.1: # %for.body.preheader +; POWERPC64-AIX-NEXT: addi 4, 4, -1 ; POWERPC64-AIX-NEXT: std 14, -144(1) # 8-byte Folded Spill ; POWERPC64-AIX-NEXT: std 15, -136(1) # 8-byte Folded Spill ; POWERPC64-AIX-NEXT: std 16, -128(1) # 8-byte Folded Spill @@ -139,14 +143,11 @@ ; POWERPC64-AIX-NEXT: std 26, -48(1) # 8-byte Folded Spill ; POWERPC64-AIX-NEXT: std 27, -40(1) # 8-byte Folded Spill ; POWERPC64-AIX-NEXT: std 28, -32(1) # 8-byte Folded Spill +; POWERPC64-AIX-NEXT: clrldi 4, 4, 32 +; POWERPC64-AIX-NEXT: addi 4, 4, 1 ; POWERPC64-AIX-NEXT: std 29, -24(1) # 8-byte Folded Spill ; POWERPC64-AIX-NEXT: std 30, -16(1) # 8-byte Folded Spill ; POWERPC64-AIX-NEXT: std 31, -8(1) # 8-byte Folded Spill -; POWERPC64-AIX-NEXT: blt 0, L..BB0_3 -; POWERPC64-AIX-NEXT: # %bb.1: # %for.body.preheader -; POWERPC64-AIX-NEXT: addi 4, 4, -1 -; POWERPC64-AIX-NEXT: clrldi 4, 4, 32 -; POWERPC64-AIX-NEXT: addi 4, 4, 1 ; POWERPC64-AIX-NEXT: mtctr 4 ; POWERPC64-AIX-NEXT: li 4, 0 ; POWERPC64-AIX-NEXT: .align 4 @@ -156,10 +157,7 @@ ; POWERPC64-AIX-NEXT: add 4, 3, 4 ; POWERPC64-AIX-NEXT: #NO_APP ; POWERPC64-AIX-NEXT: bdnz L..BB0_2 -; POWERPC64-AIX-NEXT: b L..BB0_4 -; POWERPC64-AIX-NEXT: L..BB0_3: -; POWERPC64-AIX-NEXT: li 4, 0 -; POWERPC64-AIX-NEXT: L..BB0_4: # %for.cond.cleanup +; POWERPC64-AIX-NEXT: # %bb.3: ; POWERPC64-AIX-NEXT: ld 31, -8(1) # 8-byte Folded Reload ; POWERPC64-AIX-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; POWERPC64-AIX-NEXT: ld 29, -24(1) # 8-byte Folded Reload @@ -180,6 +178,10 @@ ; POWERPC64-AIX-NEXT: ld 15, -136(1) # 8-byte Folded Reload ; POWERPC64-AIX-NEXT: ld 14, -144(1) # 8-byte Folded Reload ; POWERPC64-AIX-NEXT: blr +; POWERPC64-AIX-NEXT: L..BB0_4: +; POWERPC64-AIX-NEXT: li 4, 0 +; POWERPC64-AIX-NEXT: extsw 3, 4 +; POWERPC64-AIX-NEXT: blr entry: %cmp5 = icmp sgt i32 %lim, 0 br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/PowerPC/shrink-wrap.mir b/llvm/test/CodeGen/PowerPC/shrink-wrap.mir --- a/llvm/test/CodeGen/PowerPC/shrink-wrap.mir +++ b/llvm/test/CodeGen/PowerPC/shrink-wrap.mir @@ -48,42 +48,7 @@ ... --- name: shrinkwrapme -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false tracksRegLiveness: true -hasWinCFI: false -registers: [] -liveins: - - { reg: '$x3', virtual-reg: '' } - - { reg: '$x4', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: [] -callSites: [] -constants: [] -machineFunctionInfo: {} body: | ; CHECK-LABEL: name: shrinkwrapme ; CHECK: bb.0.entry: @@ -117,11 +82,17 @@ ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4.for.body: - ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.3(0x04000000) + ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.5(0x04000000) ; CHECK-NEXT: liveins: $r4, $x3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: INLINEASM &"add $0, $1, $2", 0 /* attdialect */, 131082 /* regdef:GPRC */, def renamable $r4, 131081 /* reguse:GPRC */, renamable $r3, 131081 /* reguse:GPRC */, killed renamable $r4, 12 /* clobber */, implicit-def dead early-clobber $r14, 12 /* clobber */, implicit-def dead early-clobber $r15, 12 /* clobber */, implicit-def dead early-clobber $r16, 12 /* clobber */, implicit-def dead early-clobber $r17, 12 /* clobber */, implicit-def dead early-clobber $r18, 12 /* clobber */, implicit-def dead early-clobber $r19, 12 /* clobber */, implicit-def dead early-clobber $r20, 12 /* clobber */, implicit-def dead early-clobber $r21, 12 /* clobber */, implicit-def dead early-clobber $r22, 12 /* clobber */, implicit-def dead early-clobber $r23, 12 /* clobber */, implicit-def dead early-clobber $r24, 12 /* clobber */, implicit-def dead early-clobber $r25, 12 /* clobber */, implicit-def dead early-clobber $r26, 12 /* clobber */, implicit-def dead early-clobber $r27, 12 /* clobber */, implicit-def dead early-clobber $r28, 12 /* clobber */, implicit-def dead early-clobber $r29, 12 /* clobber */, implicit-def dead early-clobber $r30, 12 /* clobber */, implicit-def dead early-clobber $r31 ; CHECK-NEXT: BDNZ8 %bb.4, implicit-def dead $ctr8, implicit $ctr8 + ; CHECK-NEXT: B %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $r4 + ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: B %bb.3 bb.0.entry: successors: %bb.2(0x50000000), %bb.1(0x30000000) diff --git a/llvm/test/CodeGen/RISCV/aext-to-sext.ll b/llvm/test/CodeGen/RISCV/aext-to-sext.ll --- a/llvm/test/CodeGen/RISCV/aext-to-sext.ll +++ b/llvm/test/CodeGen/RISCV/aext-to-sext.ll @@ -11,21 +11,22 @@ define void @quux(i32 signext %arg, i32 signext %arg1) nounwind { ; RV64I-LABEL: quux: ; RV64I: # %bb.0: # %bb +; RV64I-NEXT: beq a0, a1, .LBB0_4 +; RV64I-NEXT: # %bb.1: # %bb2.preheader ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: beq a0, a1, .LBB0_3 -; RV64I-NEXT: # %bb.1: # %bb2.preheader ; RV64I-NEXT: subw s0, a1, a0 ; RV64I-NEXT: .LBB0_2: # %bb2 ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: call hoge@plt ; RV64I-NEXT: addiw s0, s0, -1 ; RV64I-NEXT: bnez s0, .LBB0_2 -; RV64I-NEXT: .LBB0_3: # %bb6 +; RV64I-NEXT: # %bb.3: ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: .LBB0_4: # %bb6 ; RV64I-NEXT: ret bb: %tmp = icmp eq i32 %arg, %arg1 diff --git a/llvm/test/CodeGen/RISCV/fli-licm.ll b/llvm/test/CodeGen/RISCV/fli-licm.ll --- a/llvm/test/CodeGen/RISCV/fli-licm.ll +++ b/llvm/test/CodeGen/RISCV/fli-licm.ll @@ -12,11 +12,11 @@ define void @process_nodes(ptr %0) nounwind { ; RV32-LABEL: process_nodes: ; RV32: # %bb.0: # %entry +; RV32-NEXT: beqz a0, .LBB0_4 +; RV32-NEXT: # %bb.1: # %loop.preheader ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: beqz a0, .LBB0_3 -; RV32-NEXT: # %bb.1: # %loop.preheader ; RV32-NEXT: mv s0, a0 ; RV32-NEXT: .LBB0_2: # %loop ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 @@ -25,19 +25,20 @@ ; RV32-NEXT: call do_it@plt ; RV32-NEXT: lw s0, 0(s0) ; RV32-NEXT: bnez s0, .LBB0_2 -; RV32-NEXT: .LBB0_3: # %exit +; RV32-NEXT: # %bb.3: ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: .LBB0_4: # %exit ; RV32-NEXT: ret ; ; RV64-LABEL: process_nodes: ; RV64: # %bb.0: # %entry +; RV64-NEXT: beqz a0, .LBB0_4 +; RV64-NEXT: # %bb.1: # %loop.preheader ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64-NEXT: beqz a0, .LBB0_3 -; RV64-NEXT: # %bb.1: # %loop.preheader ; RV64-NEXT: mv s0, a0 ; RV64-NEXT: .LBB0_2: # %loop ; RV64-NEXT: # =>This Inner Loop Header: Depth=1 @@ -46,10 +47,11 @@ ; RV64-NEXT: call do_it@plt ; RV64-NEXT: ld s0, 0(s0) ; RV64-NEXT: bnez s0, .LBB0_2 -; RV64-NEXT: .LBB0_3: # %exit +; RV64-NEXT: # %bb.3: ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: .LBB0_4: # %exit ; RV64-NEXT: ret entry: %1 = icmp eq ptr %0, null diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll @@ -4,11 +4,13 @@ define i32 @test(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) { ; CHECK-LABEL: test: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB0_1: @ %for.body.preheader ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: blt .LBB0_4 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: mov lr, r0 ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: .LBB0_2: @ %for.body @@ -21,10 +23,7 @@ ; CHECK-NEXT: @NO_APP ; CHECK-NEXT: add r0, r3 ; CHECK-NEXT: bne .LBB0_2 -; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup -; CHECK-NEXT: pop {r7, pc} -; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: @ %bb.3: ; CHECK-NEXT: pop {r7, pc} entry: %cmp9 = icmp sgt i32 %n, 0 @@ -51,11 +50,13 @@ define i32 @testlr(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) { ; CHECK-LABEL: testlr: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB1_1: @ %for.body.preheader ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: blt .LBB1_4 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: mov r3, r0 ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: .LBB1_2: @ %for.body @@ -68,10 +69,7 @@ ; CHECK-NEXT: @NO_APP ; CHECK-NEXT: add r0, r4 ; CHECK-NEXT: bne .LBB1_2 -; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, pc} -; CHECK-NEXT: .LBB1_4: -; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: @ %bb.3: ; CHECK-NEXT: pop {r4, pc} entry: %cmp9 = icmp sgt i32 %n, 0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll @@ -4,11 +4,12 @@ define void @test_memcpy(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i32 %m) { ; CHECK-LABEL: test_memcpy: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB0_1: @ %for.body.preheader ; CHECK-NEXT: .save {r4, r5, r6, r7, lr} ; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: blt .LBB0_5 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: lsl.w r12, r3, #2 ; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: b .LBB0_2 @@ -31,8 +32,9 @@ ; CHECK-NEXT: vstrb.8 q0, [r5], #16 ; CHECK-NEXT: letp lr, .LBB0_4 ; CHECK-NEXT: b .LBB0_3 -; CHECK-NEXT: .LBB0_5: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: .LBB0_5: +; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr} +; CHECK-NEXT: bx lr entry: %cmp8 = icmp sgt i32 %n, 0 br i1 %cmp8, label %for.body, label %for.cond.cleanup @@ -55,12 +57,12 @@ define void @test_memset(ptr nocapture %x, i32 %n, i32 %m) { ; CHECK-LABEL: test_memset: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cmp r1, #1 ; CHECK-NEXT: it lt -; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: bxlt lr ; CHECK-NEXT: .LBB1_1: +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: b .LBB1_2 ; CHECK-NEXT: .LBB1_2: @ %for.body @@ -80,8 +82,9 @@ ; CHECK-NEXT: vstrb.8 q0, [r12], #16 ; CHECK-NEXT: letp lr, .LBB1_4 ; CHECK-NEXT: b .LBB1_3 -; CHECK-NEXT: .LBB1_5: @ %for.cond.cleanup -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: .LBB1_5: +; CHECK-NEXT: pop.w {r7, lr} +; CHECK-NEXT: bx lr entry: %cmp5 = icmp sgt i32 %n, 0 br i1 %cmp5, label %for.body, label %for.cond.cleanup @@ -102,13 +105,14 @@ define void @test_memmove(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i32 %m) { ; CHECK-LABEL: test_memmove: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB2_1: @ %for.body.preheader ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: blt .LBB2_3 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: mov r8, r3 ; CHECK-NEXT: mov r5, r2 ; CHECK-NEXT: mov r9, r1 @@ -124,9 +128,10 @@ ; CHECK-NEXT: add r6, r4 ; CHECK-NEXT: subs r5, #1 ; CHECK-NEXT: bne .LBB2_2 -; CHECK-NEXT: .LBB2_3: @ %for.cond.cleanup +; CHECK-NEXT: @ %bb.3: ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: bx lr entry: %cmp8 = icmp sgt i32 %n, 0 br i1 %cmp8, label %for.body, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll @@ -4,10 +4,11 @@ define arm_aapcs_vfpcc void @float_float_mul(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) { ; CHECK-LABEL: float_float_mul: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: beq .LBB0_10 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-NEXT: it eq +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB0_1: @ %for.body.preheader +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhi .LBB0_3 ; CHECK-NEXT: @ %bb.2: @@ -80,8 +81,9 @@ ; CHECK-NEXT: vmul.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r5, #12] ; CHECK-NEXT: bne .LBB0_9 -; CHECK-NEXT: .LBB0_10: @ %for.cond.cleanup -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: .LBB0_10: +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: bx lr ; CHECK-NEXT: .LBB0_11: @ %vector.ph ; CHECK-NEXT: bic r12, r3, #3 ; CHECK-NEXT: movs r6, #1 @@ -215,10 +217,11 @@ define arm_aapcs_vfpcc void @float_float_add(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) { ; CHECK-LABEL: float_float_add: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: beq .LBB1_10 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-NEXT: it eq +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB1_1: @ %for.body.preheader +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhi .LBB1_3 ; CHECK-NEXT: @ %bb.2: @@ -291,8 +294,9 @@ ; CHECK-NEXT: vadd.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r5, #12] ; CHECK-NEXT: bne .LBB1_9 -; CHECK-NEXT: .LBB1_10: @ %for.cond.cleanup -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: .LBB1_10: +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: bx lr ; CHECK-NEXT: .LBB1_11: @ %vector.ph ; CHECK-NEXT: bic r12, r3, #3 ; CHECK-NEXT: movs r6, #1 @@ -426,10 +430,11 @@ define arm_aapcs_vfpcc void @float_float_sub(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) { ; CHECK-LABEL: float_float_sub: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: beq .LBB2_10 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-NEXT: it eq +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB2_1: @ %for.body.preheader +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhi .LBB2_3 ; CHECK-NEXT: @ %bb.2: @@ -502,8 +507,9 @@ ; CHECK-NEXT: vsub.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r5, #12] ; CHECK-NEXT: bne .LBB2_9 -; CHECK-NEXT: .LBB2_10: @ %for.cond.cleanup -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: .LBB2_10: +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: bx lr ; CHECK-NEXT: .LBB2_11: @ %vector.ph ; CHECK-NEXT: bic r12, r3, #3 ; CHECK-NEXT: movs r6, #1 @@ -637,10 +643,11 @@ define arm_aapcs_vfpcc void @float_int_mul(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) { ; CHECK-LABEL: float_int_mul: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: beq.w .LBB3_13 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-NEXT: it eq +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB3_1: @ %for.body.preheader +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bls .LBB3_6 ; CHECK-NEXT: @ %bb.2: @ %vector.memcheck @@ -729,8 +736,9 @@ ; CHECK-NEXT: vmul.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r6, #12] ; CHECK-NEXT: bne .LBB3_12 -; CHECK-NEXT: .LBB3_13: @ %for.cond.cleanup -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: .LBB3_13: +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: bx lr entry: %cmp8 = icmp eq i32 %N, 0 br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll @@ -411,10 +411,12 @@ define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr { ; CHECK-LABEL: two_loops_mul_add_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push {r4, r5, r6, r7, lr} ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: beq .LBB6_8 -; CHECK-NEXT: @ %bb.1: @ %vector.ph +; CHECK-NEXT: itt eq +; CHECK-NEXT: moveq r0, #0 +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB6_1: @ %vector.ph +; CHECK-NEXT: push {r4, r5, r6, r7, lr} ; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: vmov.i32 q1, #0x0 ; CHECK-NEXT: bic r3, r3, #3 @@ -461,12 +463,10 @@ ; CHECK-NEXT: @ %bb.6: @ %middle.block44 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: vaddv.u32 r12, q0 -; CHECK-NEXT: .LBB6_7: @ %for.cond.cleanup7 +; CHECK-NEXT: .LBB6_7: +; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr} ; CHECK-NEXT: mov r0, r12 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} -; CHECK-NEXT: .LBB6_8: -; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: bx lr entry: %cmp35 = icmp eq i32 %N, 0 br i1 %cmp35, label %for.cond.cleanup7, label %vector.ph diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll @@ -4,10 +4,11 @@ define arm_aapcs_vfpcc void @test(ptr noalias nocapture readonly %off, ptr noalias nocapture %data, ptr noalias nocapture %dst, i32 %n) { ; CHECK-LABEL: test: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: cmp r3, #1 -; CHECK-NEXT: blt .LBB0_7 -; CHECK-NEXT: @ %bb.1: @ %for.cond1.preheader.us.preheader +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB0_1: @ %for.cond1.preheader.us.preheader +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: mov r8, r3 ; CHECK-NEXT: lsl.w r12, r3, #1 ; CHECK-NEXT: movs r3, #0 @@ -47,8 +48,9 @@ ; CHECK-NEXT: add r4, r12 ; CHECK-NEXT: cmp r3, r8 ; CHECK-NEXT: bne .LBB0_2 -; CHECK-NEXT: .LBB0_7: @ %for.cond.cleanup -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: @ %bb.7: +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: bx lr entry: %cmp252 = icmp sgt i32 %n, 0 br i1 %cmp252, label %for.cond1.preheader.us, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll @@ -5,17 +5,19 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(ptr noalias nocapture %phwTargetBase, i16 signext %iTargetStride, ptr noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) { ; CHECK-LABEL: __arm_2d_impl_rgb16_colour_filling_with_alpha: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: ldrsh.w r12, [r2, #2] +; CHECK-NEXT: cmp.w r12, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB0_1: @ %for.cond3.preheader.lr.ph ; CHECK-NEXT: push {r4, r5, r6, r7, lr} ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: sub sp, #64 -; CHECK-NEXT: ldrsh.w r12, [r2, #2] -; CHECK-NEXT: cmp.w r12, #1 -; CHECK-NEXT: itt ge -; CHECK-NEXT: ldrshge.w r7, [r2] -; CHECK-NEXT: cmpge r7, #1 -; CHECK-NEXT: blt.w .LBB0_5 -; CHECK-NEXT: @ %bb.1: @ %for.cond3.preheader.us.preheader +; CHECK-NEXT: ldrsh.w r7, [r2] +; CHECK-NEXT: cmp r7, #1 +; CHECK-NEXT: blt.w .LBB0_6 +; CHECK-NEXT: @ %bb.2: @ %for.cond3.preheader.us.preheader ; CHECK-NEXT: movs r2, #252 ; CHECK-NEXT: ldr r4, [sp, #152] ; CHECK-NEXT: and.w r6, r2, r3, lsr #3 @@ -46,14 +48,14 @@ ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill ; CHECK-NEXT: vstrw.32 q2, [sp, #32] @ 16-byte Spill ; CHECK-NEXT: vstrw.32 q3, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: .LBB0_2: @ %vector.ph +; CHECK-NEXT: .LBB0_3: @ %vector.ph ; CHECK-NEXT: @ =>This Loop Header: Depth=1 -; CHECK-NEXT: @ Child Loop BB0_3 Depth 2 +; CHECK-NEXT: @ Child Loop BB0_4 Depth 2 ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r6, r7 ; CHECK-NEXT: dls lr, r3 -; CHECK-NEXT: .LBB0_3: @ %vector.body -; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1 +; CHECK-NEXT: .LBB0_4: @ %vector.body +; CHECK-NEXT: @ Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: vctp.16 r6 ; CHECK-NEXT: subs r6, #8 @@ -89,18 +91,19 @@ ; CHECK-NEXT: vorr q0, q1, q0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrht.16 q0, [r5], #16 -; CHECK-NEXT: le lr, .LBB0_3 -; CHECK-NEXT: @ %bb.4: @ %for.cond3.for.cond.cleanup7_crit_edge.us -; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: le lr, .LBB0_4 +; CHECK-NEXT: @ %bb.5: @ %for.cond3.for.cond.cleanup7_crit_edge.us +; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: adds r4, #1 ; CHECK-NEXT: add.w r0, r0, r1, lsl #1 ; CHECK-NEXT: cmp r4, r12 -; CHECK-NEXT: bne .LBB0_2 -; CHECK-NEXT: .LBB0_5: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB0_3 +; CHECK-NEXT: .LBB0_6: ; CHECK-NEXT: add sp, #64 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr} +; CHECK-NEXT: bx lr entry: %iHeight = getelementptr inbounds %struct.arm_2d_size_t, ptr %ptCopySize, i32 0, i32 1 %0 = load i16, ptr %iHeight, align 2 @@ -184,18 +187,19 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(ptr noalias nocapture %phwTargetBase, i16 signext %iTargetStride, ptr noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) "target-cpu"="cortex-m55" { ; CHECK-LABEL: __arm_2d_impl_rgb16_colour_filling_with_alpha_sched: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: sub sp, #80 ; CHECK-NEXT: ldrsh.w r12, [r2, #2] ; CHECK-NEXT: cmp.w r12, #1 -; CHECK-NEXT: blt.w .LBB1_6 +; CHECK-NEXT: blt.w .LBB1_7 ; CHECK-NEXT: @ %bb.1: @ %for.cond3.preheader.lr.ph ; CHECK-NEXT: ldrsh.w r2, [r2] ; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: blt .LBB1_6 -; CHECK-NEXT: @ %bb.2: @ %for.cond3.preheader.us.preheader +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB1_2: @ %for.cond3.preheader.us.preheader +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: sub sp, #80 ; CHECK-NEXT: ldr r7, [sp, #168] ; CHECK-NEXT: movs r5, #120 ; CHECK-NEXT: lsls r6, r3, #3 @@ -265,11 +269,13 @@ ; CHECK-NEXT: adds r4, #1 ; CHECK-NEXT: cmp r4, r12 ; CHECK-NEXT: bne .LBB1_3 -; CHECK-NEXT: .LBB1_6: @ %for.cond.cleanup +; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: add sp, #80 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr} +; CHECK-NEXT: .LBB1_7: @ %for.cond.cleanup +; CHECK-NEXT: bx lr entry: %iHeight = getelementptr inbounds %struct.arm_2d_size_t, ptr %ptCopySize, i32 0, i32 1 %0 = load i16, ptr %iHeight, align 2 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll @@ -53,10 +53,12 @@ define void @nested(ptr nocapture readonly %x, ptr nocapture readnone %y, ptr nocapture %z, i32 %m, i32 %n) { ; CHECK-LABEL: nested: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: it eq +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB1_1: @ %for.body.preheader ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: cbz r3, .LBB1_8 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: ldr.w r12, [sp, #24] ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: b .LBB1_4 @@ -91,8 +93,9 @@ ; CHECK-NEXT: sub.w r12, r12, r5 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: b .LBB1_3 -; CHECK-NEXT: .LBB1_8: @ %for.cond.cleanup -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: .LBB1_8: +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: bx lr entry: %cmp20.not = icmp eq i32 %m, 0 br i1 %cmp20.not, label %for.cond.cleanup, label %for.body diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -981,6 +981,13 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) { ; CHECK-LABEL: fir: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r3, #8 +; CHECK-NEXT: blo.w .LBB16_13 +; CHECK-NEXT: @ %bb.1: @ %if.then +; CHECK-NEXT: lsrs.w r12, r3, #2 +; CHECK-NEXT: it eq +; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB16_2: @ %while.body.lr.ph ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 @@ -989,12 +996,6 @@ ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: .pad #32 ; CHECK-NEXT: sub sp, #32 -; CHECK-NEXT: cmp r3, #8 -; CHECK-NEXT: blo.w .LBB16_12 -; CHECK-NEXT: @ %bb.1: @ %if.then -; CHECK-NEXT: lsrs.w r12, r3, #2 -; CHECK-NEXT: beq.w .LBB16_12 -; CHECK-NEXT: @ %bb.2: @ %while.body.lr.ph ; CHECK-NEXT: ldrh r6, [r0] ; CHECK-NEXT: movs r5, #1 ; CHECK-NEXT: ldrd r4, r10, [r0, #4] @@ -1106,11 +1107,13 @@ ; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: add.w r4, r4, r0, lsl #2 ; CHECK-NEXT: b .LBB16_4 -; CHECK-NEXT: .LBB16_12: @ %if.end +; CHECK-NEXT: .LBB16_12: ; CHECK-NEXT: add sp, #32 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .LBB16_13: @ %if.end +; CHECK-NEXT: bx lr entry: %pState1 = getelementptr inbounds %struct.arm_fir_instance_f32, ptr %S, i32 0, i32 1 %i = load ptr, ptr %pState1, align 4 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll --- a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll @@ -290,12 +290,12 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) { ; CHECK-LABEL: gather_inc_v4i32_simple: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt -; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: bxlt lr ; CHECK-NEXT: .LBB8_1: @ %vector.ph.preheader +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: bic r12, r2, #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: sub.w lr, r12, #4 @@ -319,8 +319,9 @@ ; CHECK-NEXT: @ in Loop: Header=BB8_2 Depth=1 ; CHECK-NEXT: cmp r12, r2 ; CHECK-NEXT: bne .LBB8_2 -; CHECK-NEXT: @ %bb.5: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: @ %bb.5: +; CHECK-NEXT: pop.w {r4, lr} +; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: .LCPI8_0: @@ -359,13 +360,14 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_complex(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) { ; CHECK-LABEL: gather_inc_v4i32_complex: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB9_1: @ %vector.ph.preheader ; CHECK-NEXT: .save {r4, r5, r7, lr} ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: blt .LBB9_5 -; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader ; CHECK-NEXT: bic r12, r2, #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: sub.w lr, r12, #4 @@ -401,9 +403,10 @@ ; CHECK-NEXT: @ in Loop: Header=BB9_2 Depth=1 ; CHECK-NEXT: cmp r12, r2 ; CHECK-NEXT: bne .LBB9_2 -; CHECK-NEXT: .LBB9_5: @ %for.cond.cleanup +; CHECK-NEXT: @ %bb.5: ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: pop.w {r4, r5, r7, lr} +; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: .LCPI9_0: @@ -461,12 +464,12 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_large(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) { ; CHECK-LABEL: gather_inc_v4i32_large: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt -; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: bxlt lr ; CHECK-NEXT: .LBB10_1: @ %vector.ph.preheader +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: bic r12, r2, #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: sub.w lr, r12, #4 @@ -490,8 +493,9 @@ ; CHECK-NEXT: @ in Loop: Header=BB10_2 Depth=1 ; CHECK-NEXT: cmp r12, r2 ; CHECK-NEXT: bne .LBB10_2 -; CHECK-NEXT: @ %bb.5: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: @ %bb.5: +; CHECK-NEXT: pop.w {r4, lr} +; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: .LCPI10_0: diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll b/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll --- a/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll @@ -4,12 +4,12 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) { ; CHECK-LABEL: gather_inc_v4i32_simple: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt -; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: bxlt lr ; CHECK-NEXT: .LBB0_1: @ %vector.ph.preheader +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: bic r12, r2, #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: sub.w lr, r12, #4 @@ -33,8 +33,9 @@ ; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: cmp r12, r2 ; CHECK-NEXT: bne .LBB0_2 -; CHECK-NEXT: @ %bb.5: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: @ %bb.5: +; CHECK-NEXT: pop.w {r4, lr} +; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: .LCPI0_0: diff --git a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll --- a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll +++ b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll @@ -211,12 +211,12 @@ define void @test11(ptr nocapture %x, ptr nocapture %y, i32 %n) { ; CHECK-LABEL: test11: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cmp.w r2, #-1 ; CHECK-NEXT: it gt -; CHECK-NEXT: popgt {r4, pc} +; CHECK-NEXT: bxgt lr ; CHECK-NEXT: .LBB10_1: @ %prehead +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: mov r12, r1 ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: wlstp.8 lr, r2, .LBB10_3 @@ -230,8 +230,9 @@ ; CHECK-NEXT: subs r2, #2 ; CHECK-NEXT: strb r3, [r1], #1 ; CHECK-NEXT: bne .LBB10_3 -; CHECK-NEXT: @ %bb.4: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: @ %bb.4: +; CHECK-NEXT: pop.w {r4, lr} +; CHECK-NEXT: bx lr entry: %cmp6 = icmp slt i32 %n, 0 br i1 %cmp6, label %prehead, label %for.cond.cleanup @@ -440,12 +441,12 @@ define void @multilooped_exit(i32 %b) { ; CHECK-LABEL: multilooped_exit: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cmp r0, #1 ; CHECK-NEXT: it lt -; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: bxlt lr ; CHECK-NEXT: .LBB18_1: @ %loop.preheader +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: mov.w r4, #-1 ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: b .LBB18_3 @@ -498,8 +499,9 @@ ; CHECK-NEXT: vstrb.8 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB18_11 ; CHECK-NEXT: b .LBB18_2 -; CHECK-NEXT: .LBB18_12: @ %exit -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: .LBB18_12: +; CHECK-NEXT: pop.w {r4, lr} +; CHECK-NEXT: bx lr entry: %cmp8 = icmp sgt i32 %b, 0 br i1 %cmp8, label %loop, label %exit diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll --- a/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll @@ -6,13 +6,14 @@ define void @DCT_mve1(ptr nocapture readonly %S, ptr nocapture readonly %pIn, ptr nocapture %pOut) { ; CHECK-LABEL: DCT_mve1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} ; CHECK-NEXT: ldr r3, [r0, #4] ; CHECK-NEXT: sub.w r12, r3, #1 ; CHECK-NEXT: cmp.w r12, #2 -; CHECK-NEXT: blo .LBB0_5 -; CHECK-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-NEXT: it lo +; CHECK-NEXT: bxlo lr +; CHECK-NEXT: .LBB0_1: @ %for.body.preheader +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} ; CHECK-NEXT: ldr r5, [r0, #8] ; CHECK-NEXT: ldr r3, [r0] ; CHECK-NEXT: add.w r3, r3, r5, lsl #2 @@ -43,8 +44,9 @@ ; CHECK-NEXT: vadd.f32 s0, s0, s2 ; CHECK-NEXT: vstr s0, [r7] ; CHECK-NEXT: bne .LBB0_2 -; CHECK-NEXT: .LBB0_5: @ %for.cond.cleanup -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-NEXT: @ %bb.5: +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: bx lr entry: %NumInputs = getelementptr inbounds %struct.DCT_InstanceTypeDef, ptr %S, i32 0, i32 2 %i = load i32, ptr %NumInputs, align 4 diff --git a/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll b/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll --- a/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll +++ b/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll @@ -127,15 +127,16 @@ define arm_aapcs_vfpcc void @scatter_inc_v4i32_complex(<4 x i32> %data1, <4 x i32> %data2, <4 x i32> %data3, ptr %dst, i32 %n) { ; CHECK-LABEL: scatter_inc_v4i32_complex: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r1, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB3_1: @ %vector.ph.preheader ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: .pad #16 ; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: cmp r1, #1 -; CHECK-NEXT: blt .LBB3_5 -; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader ; CHECK-NEXT: adr r4, .LCPI3_2 ; CHECK-NEXT: bic r2, r1, #3 ; CHECK-NEXT: vldrw.u32 q3, [r4] @@ -168,10 +169,11 @@ ; CHECK-NEXT: @ in Loop: Header=BB3_2 Depth=1 ; CHECK-NEXT: cmp r2, r1 ; CHECK-NEXT: bne .LBB3_2 -; CHECK-NEXT: .LBB3_5: @ %for.cond.cleanup +; CHECK-NEXT: @ %bb.5: ; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: pop.w {r4, lr} +; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: .LCPI3_0: diff --git a/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll b/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll --- a/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll +++ b/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll @@ -58,11 +58,12 @@ define arm_aapcs_vfpcc void @start11(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr noalias nocapture %z, float %a, i32 %n) { ; CHECK-LABEL: start11: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r3, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB1_1: @ %vector.ph ; CHECK-NEXT: .save {r4, r5, r7, lr} ; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: cmp r3, #1 -; CHECK-NEXT: blt .LBB1_3 -; CHECK-NEXT: @ %bb.1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: adds r4, r3, #3 ; CHECK-NEXT: adr r5, .LCPI1_0 @@ -85,8 +86,9 @@ ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q3, [r2], #16 ; CHECK-NEXT: bne .LBB1_2 -; CHECK-NEXT: .LBB1_3: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: pop.w {r4, r5, r7, lr} +; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.4: ; CHECK-NEXT: .LCPI1_0: diff --git a/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll b/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll --- a/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll @@ -4,11 +4,13 @@ define arm_aapcs_vfpcc void @test32(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, ptr nocapture %z, i32 %n) { ; CHECK-LABEL: test32: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r3, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB0_1: @ %vector.body.preheader ; CHECK-NEXT: .save {r4, r5, r7, lr} ; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: cmp r3, #1 -; CHECK-NEXT: blt .LBB0_2 -; CHECK-NEXT: .LBB0_1: @ %vector.body +; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 @@ -26,9 +28,10 @@ ; CHECK-NEXT: lsrl r4, r5, #31 ; CHECK-NEXT: vmov q2[3], q2[1], r4, r12 ; CHECK-NEXT: vstrb.8 q2, [r2], #16 -; CHECK-NEXT: bne .LBB0_1 -; CHECK-NEXT: .LBB0_2: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: bne .LBB0_2 +; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: pop.w {r4, r5, r7, lr} +; CHECK-NEXT: bx lr entry: %0 = and i32 %n, 3 %cmp = icmp eq i32 %0, 0 diff --git a/llvm/test/CodeGen/X86/fold-call-3.ll b/llvm/test/CodeGen/X86/fold-call-3.ll --- a/llvm/test/CodeGen/X86/fold-call-3.ll +++ b/llvm/test/CodeGen/X86/fold-call-3.ll @@ -13,12 +13,12 @@ define void @_Z25RawPointerPerformanceTestPvRN5clang6ActionE(ptr %Val, ptr %Actions) nounwind { ; CHECK-LABEL: _Z25RawPointerPerformanceTestPvRN5clang6ActionE: ; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: cmpl $0, _NumTrials(%rip) +; CHECK-NEXT: je LBB0_4 +; CHECK-NEXT: ## %bb.1: ## %bb.nph ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: cmpl $0, _NumTrials(%rip) -; CHECK-NEXT: je LBB0_3 -; CHECK-NEXT: ## %bb.1: ## %bb.nph ; CHECK-NEXT: movq %rsi, %rbx ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: xorl %ebp, %ebp @@ -34,20 +34,21 @@ ; CHECK-NEXT: incl %ebp ; CHECK-NEXT: cmpl _NumTrials(%rip), %ebp ; CHECK-NEXT: jb LBB0_2 -; CHECK-NEXT: LBB0_3: ## %return +; CHECK-NEXT: ## %bb.3: ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp +; CHECK-NEXT: LBB0_4: ## %return ; CHECK-NEXT: retq ; ; pre-RA-LABEL: _Z25RawPointerPerformanceTestPvRN5clang6ActionE: ; pre-RA: ## %bb.0: ## %entry +; pre-RA-NEXT: cmpl $0, _NumTrials(%rip) +; pre-RA-NEXT: je LBB0_4 +; pre-RA-NEXT: ## %bb.1: ## %bb.nph ; pre-RA-NEXT: pushq %rbp ; pre-RA-NEXT: pushq %rbx ; pre-RA-NEXT: subq $24, %rsp -; pre-RA-NEXT: cmpl $0, _NumTrials(%rip) -; pre-RA-NEXT: je LBB0_3 -; pre-RA-NEXT: ## %bb.1: ## %bb.nph ; pre-RA-NEXT: movq %rsi, %rbx ; pre-RA-NEXT: movq %rdi, %rax ; pre-RA-NEXT: xorl %ebp, %ebp @@ -63,10 +64,11 @@ ; pre-RA-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ; pre-RA-NEXT: cmpl _NumTrials(%rip), %ebp ; pre-RA-NEXT: jb LBB0_2 -; pre-RA-NEXT: LBB0_3: ## %return +; pre-RA-NEXT: ## %bb.3: ; pre-RA-NEXT: addq $24, %rsp ; pre-RA-NEXT: popq %rbx ; pre-RA-NEXT: popq %rbp +; pre-RA-NEXT: LBB0_4: ## %return ; pre-RA-NEXT: retq entry: %i = alloca %"struct.clang::ActionBase::ActionResult<0u>", align 8 diff --git a/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll b/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll --- a/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll +++ b/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll @@ -9,12 +9,14 @@ define void @foo(i32 %N) nounwind { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: js .LBB0_1 +; CHECK-NEXT: # %bb.4: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_1: # %bb.preheader ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: pushq %rax -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jns .LBB0_3 -; CHECK-NEXT: # %bb.1: # %bb.preheader ; CHECK-NEXT: movl %edi, %ebx ; CHECK-NEXT: xorl %ebp, %ebp ; CHECK-NEXT: .p2align 4, 0x90 @@ -26,7 +28,7 @@ ; CHECK-NEXT: decl %ebp ; CHECK-NEXT: cmpl %ebp, %ebx ; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: .LBB0_3: # %return +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp diff --git a/llvm/test/CodeGen/X86/pr44412.ll b/llvm/test/CodeGen/X86/pr44412.ll --- a/llvm/test/CodeGen/X86/pr44412.ll +++ b/llvm/test/CodeGen/X86/pr44412.ll @@ -4,10 +4,10 @@ define void @bar(i32 %0, i32 %1) nounwind { ; CHECK-LABEL: bar: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: je .LBB0_4 ; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: movl %edi, %ebx ; CHECK-NEXT: decl %ebx ; CHECK-NEXT: .p2align 4, 0x90 @@ -16,8 +16,9 @@ ; CHECK-NEXT: callq foo@PLT ; CHECK-NEXT: addl $-1, %ebx ; CHECK-NEXT: jb .LBB0_2 -; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .LBB0_4: ; CHECK-NEXT: retq %3 = icmp eq i32 %0, 0 br i1 %3, label %8, label %4 @@ -36,10 +37,10 @@ define void @baz(i32 %0, i32 %1) nounwind { ; CHECK-LABEL: baz: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: je .LBB1_3 +; CHECK-NEXT: je .LBB1_4 ; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: movl %edi, %ebx ; CHECK-NEXT: decl %ebx ; CHECK-NEXT: .p2align 4, 0x90 @@ -48,8 +49,9 @@ ; CHECK-NEXT: callq foo@PLT ; CHECK-NEXT: addl $-1, %ebx ; CHECK-NEXT: jae .LBB1_2 -; CHECK-NEXT: .LBB1_3: +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: retq %3 = icmp eq i32 %0, 0 br i1 %3, label %8, label %4 diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll --- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -639,40 +639,40 @@ define void @useLEA(ptr readonly %x) { ; ENABLE-LABEL: useLEA: ; ENABLE: ## %bb.0: ## %entry -; ENABLE-NEXT: pushq %rax -; ENABLE-NEXT: .cfi_def_cfa_offset 16 ; ENABLE-NEXT: testq %rdi, %rdi -; ENABLE-NEXT: je LBB8_7 +; ENABLE-NEXT: je LBB8_9 ; ENABLE-NEXT: ## %bb.1: ## %if.end ; ENABLE-NEXT: cmpw $66, (%rdi) -; ENABLE-NEXT: jne LBB8_7 +; ENABLE-NEXT: jne LBB8_9 ; ENABLE-NEXT: ## %bb.2: ## %lor.lhs.false +; ENABLE-NEXT: pushq %rax +; ENABLE-NEXT: .cfi_def_cfa_offset 16 ; ENABLE-NEXT: movq 8(%rdi), %rdi ; ENABLE-NEXT: movzwl (%rdi), %eax ; ENABLE-NEXT: leal -54(%rax), %ecx ; ENABLE-NEXT: cmpl $14, %ecx ; ENABLE-NEXT: ja LBB8_3 -; ENABLE-NEXT: ## %bb.8: ## %lor.lhs.false +; ENABLE-NEXT: ## %bb.7: ## %lor.lhs.false ; ENABLE-NEXT: movl $24599, %edx ## imm = 0x6017 ; ENABLE-NEXT: btl %ecx, %edx ; ENABLE-NEXT: jae LBB8_3 -; ENABLE-NEXT: LBB8_7: ## %cleanup -; ENABLE-NEXT: popq %rax +; ENABLE-NEXT: LBB8_8: +; ENABLE-NEXT: addq $8, %rsp +; ENABLE-NEXT: LBB8_9: ## %cleanup ; ENABLE-NEXT: retq ; ENABLE-NEXT: LBB8_3: ## %lor.lhs.false ; ENABLE-NEXT: cmpl $134, %eax -; ENABLE-NEXT: je LBB8_7 +; ENABLE-NEXT: je LBB8_8 ; ENABLE-NEXT: ## %bb.4: ## %lor.lhs.false ; ENABLE-NEXT: cmpl $140, %eax -; ENABLE-NEXT: je LBB8_7 +; ENABLE-NEXT: je LBB8_8 ; ENABLE-NEXT: ## %bb.5: ## %if.end.55 ; ENABLE-NEXT: callq _find_temp_slot_from_address ; ENABLE-NEXT: testq %rax, %rax -; ENABLE-NEXT: je LBB8_7 +; ENABLE-NEXT: je LBB8_8 ; ENABLE-NEXT: ## %bb.6: ## %if.then.60 ; ENABLE-NEXT: movb $1, 57(%rax) -; ENABLE-NEXT: popq %rax -; ENABLE-NEXT: retq +; ENABLE-NEXT: jmp LBB8_8 ; ; DISABLE-LABEL: useLEA: ; DISABLE: ## %bb.0: ## %entry diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll --- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll @@ -23,7 +23,7 @@ ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_5: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret entry: %cmp13 = icmp sgt i32 %c, 0 @@ -62,7 +62,7 @@ ; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: add x10, x1, #4 ; CHECK-NEXT: add x11, x2, #8 -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: .LBB1_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr w12, [x10, x8, lsl #2] @@ -142,7 +142,7 @@ ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB2_5: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret entry: %cmp13 = icmp sgt i32 %c, 0 diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll --- a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll @@ -182,12 +182,12 @@ define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind { ; X64-LABEL: extrastride: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rbx ; X64-NEXT: # kill: def $ecx killed $ecx def $rcx ; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: testl %r9d, %r9d -; X64-NEXT: je .LBB2_3 +; X64-NEXT: je .LBB2_4 ; X64-NEXT: # %bb.1: # %for.body.lr.ph +; X64-NEXT: pushq %rbx ; X64-NEXT: leal (%rsi,%rsi), %r10d ; X64-NEXT: leal (%rsi,%rsi,2), %r11d ; X64-NEXT: addl %esi, %ecx @@ -213,8 +213,9 @@ ; X64-NEXT: addq %r8, %rdx ; X64-NEXT: decl %r9d ; X64-NEXT: jne .LBB2_2 -; X64-NEXT: .LBB2_3: # %for.end +; X64-NEXT: # %bb.3: ; X64-NEXT: popq %rbx +; X64-NEXT: .LBB2_4: # %for.end ; X64-NEXT: retq ; ; X32-LABEL: extrastride: