diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -790,9 +790,7 @@ } // Ensure non-terminators don't follow terminators. - // Ignore predicated terminators formed by if conversion. - // FIXME: If conversion shouldn't need to violate this rule. - if (MI->isTerminator() && !TII->isPredicated(*MI)) { + if (MI->isTerminator()) { if (!FirstTerminator) FirstTerminator = MI; } else if (FirstTerminator) { diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -317,8 +317,8 @@ TBB = nullptr; FBB = nullptr; - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) + MachineBasicBlock::instr_iterator I = MBB.instr_end(); + if (I == MBB.instr_begin()) return false; // Empty blocks are easy. --I; @@ -332,7 +332,7 @@ // Skip over DEBUG values and predicated nonterminators. while (I->isDebugInstr() || !I->isTerminator()) { - if (I == MBB.begin()) + if (I == MBB.instr_begin()) return false; --I; } @@ -356,7 +356,7 @@ Cond.push_back(I->getOperand(2)); } else if (I->isReturn()) { // Returns can't be analyzed, but we should run cleanup. - CantAnalyze = !isPredicated(*I); + CantAnalyze = true; } else { // We encountered other unrecognized terminator. Bail out immediately. return true; @@ -377,7 +377,7 @@ // unconditional branch. if (AllowModify) { MachineBasicBlock::iterator DI = std::next(I); - while (DI != MBB.end()) { + while (DI != MBB.instr_end()) { MachineInstr &InstToDelete = *DI; ++DI; InstToDelete.eraseFromParent(); @@ -385,10 +385,19 @@ } } - if (CantAnalyze) + if (CantAnalyze) { + // We may not be able to analyze the block, but we could still have + // an unconditional branch as the last instruction in the block, which + // just branches to layout successor. If this is the case, then just + // remove it if we're allowed to make modifications. + if (AllowModify && !isPredicated(MBB.back()) && + isUncondBranchOpcode(MBB.back().getOpcode()) && + TBB && MBB.isLayoutSuccessor(TBB)) + removeBranch(MBB); return true; + } - if (I == MBB.begin()) + if (I == MBB.instr_begin()) return false; --I; diff --git a/llvm/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll b/llvm/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll --- a/llvm/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll +++ b/llvm/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll @@ -1,7 +1,6 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s ; RUN: llc < %s -mtriple=thumbv8 | FileCheck -check-prefix=CHECK-V8 %s ; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it | FileCheck -check-prefix=CHECK-V8 %s -; rdar://13782395 define i32 @t1(i32 %a, i32 %b, i8** %retaddr) { ; CHECK-LABEL: t1: @@ -101,7 +100,6 @@ ; B can be predicated with A.BrToBPredicate into A iff B.Predicate is less ; "permissive" than A.BrToBPredicate, i.e., iff A.BrToBPredicate subsumes ; B.Predicate. -; ; Hard-coded registers comes from the ABI. ; CHECK-LABEL: wrapDistance: @@ -109,6 +107,7 @@ ; CHECK-NEXT: itt le ; CHECK-NEXT: suble r0, r2, #1 ; CHECK-NEXT: bxle lr +; CHECK-NEXT: LBB{{.*}}: ; CHECK-NEXT: subs [[REG:r[0-9]+]], #120 ; CHECK-NEXT: cmp [[REG]], r1 ; CHECK-NOT: it lt diff --git a/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll b/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll --- a/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll +++ b/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll @@ -20,87 +20,88 @@ ; ENABLE-NEXT: .save {r11, lr} ; ENABLE-NEXT: push {r11, lr} ; ENABLE-NEXT: cmn r1, #1 -; ENABLE-NEXT: ble .LBB0_6 +; ENABLE-NEXT: ble .LBB0_7 ; ENABLE-NEXT: @ %bb.1: @ %while.cond.preheader ; ENABLE-NEXT: cmp r1, #0 -; ENABLE-NEXT: beq .LBB0_5 +; ENABLE-NEXT: beq .LBB0_6 ; ENABLE-NEXT: @ %bb.2: @ %while.cond.preheader ; ENABLE-NEXT: cmp r0, r2 ; ENABLE-NEXT: pophs {r11, pc} +; ENABLE-NEXT: .LBB0_3: @ %while.body.preheader ; ENABLE-NEXT: movw r12, :lower16:skip ; ENABLE-NEXT: sub r1, r1, #1 ; ENABLE-NEXT: movt r12, :upper16:skip -; ENABLE-NEXT: .LBB0_3: @ %while.body +; ENABLE-NEXT: .LBB0_4: @ %while.body ; ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: ldrb r3, [r0] ; ENABLE-NEXT: ldrb r3, [r12, r3] ; ENABLE-NEXT: add r0, r0, r3 ; ENABLE-NEXT: sub r3, r1, #1 ; ENABLE-NEXT: cmp r3, r1 -; ENABLE-NEXT: bhs .LBB0_5 -; ENABLE-NEXT: @ %bb.4: @ %while.body -; ENABLE-NEXT: @ in Loop: Header=BB0_3 Depth=1 +; ENABLE-NEXT: bhs .LBB0_6 +; ENABLE-NEXT: @ %bb.5: @ %while.body +; ENABLE-NEXT: @ in Loop: Header=BB0_4 Depth=1 ; ENABLE-NEXT: cmp r0, r2 ; ENABLE-NEXT: mov r1, r3 -; ENABLE-NEXT: blo .LBB0_3 -; ENABLE-NEXT: .LBB0_5: @ %if.end29 +; ENABLE-NEXT: blo .LBB0_4 +; ENABLE-NEXT: .LBB0_6: @ %if.end29 ; ENABLE-NEXT: pop {r11, pc} -; ENABLE-NEXT: .LBB0_6: @ %while.cond2.outer +; ENABLE-NEXT: .LBB0_7: @ %while.cond2.outer ; ENABLE-NEXT: @ =>This Loop Header: Depth=1 -; ENABLE-NEXT: @ Child Loop BB0_7 Depth 2 -; ENABLE-NEXT: @ Child Loop BB0_14 Depth 2 +; ENABLE-NEXT: @ Child Loop BB0_8 Depth 2 +; ENABLE-NEXT: @ Child Loop BB0_15 Depth 2 ; ENABLE-NEXT: mov r3, r0 -; ENABLE-NEXT: .LBB0_7: @ %while.cond2 -; ENABLE-NEXT: @ Parent Loop BB0_6 Depth=1 +; ENABLE-NEXT: .LBB0_8: @ %while.cond2 +; ENABLE-NEXT: @ Parent Loop BB0_7 Depth=1 ; ENABLE-NEXT: @ => This Inner Loop Header: Depth=2 ; ENABLE-NEXT: add r1, r1, #1 ; ENABLE-NEXT: cmp r1, #1 -; ENABLE-NEXT: beq .LBB0_17 -; ENABLE-NEXT: @ %bb.8: @ %while.body4 -; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=2 +; ENABLE-NEXT: beq .LBB0_18 +; ENABLE-NEXT: @ %bb.9: @ %while.body4 +; ENABLE-NEXT: @ in Loop: Header=BB0_8 Depth=2 ; ENABLE-NEXT: cmp r3, r2 -; ENABLE-NEXT: bls .LBB0_7 -; ENABLE-NEXT: @ %bb.9: @ %if.then7 -; ENABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; ENABLE-NEXT: bls .LBB0_8 +; ENABLE-NEXT: @ %bb.10: @ %if.then7 +; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 ; ENABLE-NEXT: mov r0, r3 ; ENABLE-NEXT: ldrb r12, [r0, #-1]! ; ENABLE-NEXT: sxtb lr, r12 ; ENABLE-NEXT: cmn lr, #1 -; ENABLE-NEXT: bgt .LBB0_6 -; ENABLE-NEXT: @ %bb.10: @ %if.then7 -; ENABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; ENABLE-NEXT: bgt .LBB0_7 +; ENABLE-NEXT: @ %bb.11: @ %if.then7 +; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 ; ENABLE-NEXT: cmp r0, r2 -; ENABLE-NEXT: bls .LBB0_6 -; ENABLE-NEXT: @ %bb.11: @ %land.rhs14.preheader -; ENABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 -; ENABLE-NEXT: cmn lr, #1 -; ENABLE-NEXT: bgt .LBB0_6 +; ENABLE-NEXT: bls .LBB0_7 ; ENABLE-NEXT: @ %bb.12: @ %land.rhs14.preheader -; ENABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 +; ENABLE-NEXT: cmn lr, #1 +; ENABLE-NEXT: bgt .LBB0_7 +; ENABLE-NEXT: @ %bb.13: @ %land.rhs14.preheader +; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 ; ENABLE-NEXT: cmp r12, #191 -; ENABLE-NEXT: bhi .LBB0_6 -; ENABLE-NEXT: @ %bb.13: @ %while.body24.preheader -; ENABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; ENABLE-NEXT: bhi .LBB0_7 +; ENABLE-NEXT: @ %bb.14: @ %while.body24.preheader +; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 ; ENABLE-NEXT: sub r3, r3, #2 -; ENABLE-NEXT: .LBB0_14: @ %while.body24 -; ENABLE-NEXT: @ Parent Loop BB0_6 Depth=1 +; ENABLE-NEXT: .LBB0_15: @ %while.body24 +; ENABLE-NEXT: @ Parent Loop BB0_7 Depth=1 ; ENABLE-NEXT: @ => This Inner Loop Header: Depth=2 ; ENABLE-NEXT: mov r0, r3 ; ENABLE-NEXT: cmp r3, r2 -; ENABLE-NEXT: bls .LBB0_6 -; ENABLE-NEXT: @ %bb.15: @ %while.body24.land.rhs14_crit_edge -; ENABLE-NEXT: @ in Loop: Header=BB0_14 Depth=2 +; ENABLE-NEXT: bls .LBB0_7 +; ENABLE-NEXT: @ %bb.16: @ %while.body24.land.rhs14_crit_edge +; ENABLE-NEXT: @ in Loop: Header=BB0_15 Depth=2 ; ENABLE-NEXT: mov r3, r0 ; ENABLE-NEXT: ldrsb lr, [r3], #-1 ; ENABLE-NEXT: cmn lr, #1 ; ENABLE-NEXT: uxtb r12, lr -; ENABLE-NEXT: bgt .LBB0_6 -; ENABLE-NEXT: @ %bb.16: @ %while.body24.land.rhs14_crit_edge -; ENABLE-NEXT: @ in Loop: Header=BB0_14 Depth=2 +; ENABLE-NEXT: bgt .LBB0_7 +; ENABLE-NEXT: @ %bb.17: @ %while.body24.land.rhs14_crit_edge +; ENABLE-NEXT: @ in Loop: Header=BB0_15 Depth=2 ; ENABLE-NEXT: cmp r12, #192 -; ENABLE-NEXT: blo .LBB0_14 -; ENABLE-NEXT: b .LBB0_6 -; ENABLE-NEXT: .LBB0_17: +; ENABLE-NEXT: blo .LBB0_15 +; ENABLE-NEXT: b .LBB0_7 +; ENABLE-NEXT: .LBB0_18: ; ENABLE-NEXT: mov r0, r3 ; ENABLE-NEXT: pop {r11, pc} ; @@ -109,87 +110,88 @@ ; DISABLE-NEXT: .save {r11, lr} ; DISABLE-NEXT: push {r11, lr} ; DISABLE-NEXT: cmn r1, #1 -; DISABLE-NEXT: ble .LBB0_6 +; DISABLE-NEXT: ble .LBB0_7 ; DISABLE-NEXT: @ %bb.1: @ %while.cond.preheader ; DISABLE-NEXT: cmp r1, #0 -; DISABLE-NEXT: beq .LBB0_5 +; DISABLE-NEXT: beq .LBB0_6 ; DISABLE-NEXT: @ %bb.2: @ %while.cond.preheader ; DISABLE-NEXT: cmp r0, r2 ; DISABLE-NEXT: pophs {r11, pc} +; DISABLE-NEXT: .LBB0_3: @ %while.body.preheader ; DISABLE-NEXT: movw r12, :lower16:skip ; DISABLE-NEXT: sub r1, r1, #1 ; DISABLE-NEXT: movt r12, :upper16:skip -; DISABLE-NEXT: .LBB0_3: @ %while.body +; DISABLE-NEXT: .LBB0_4: @ %while.body ; DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: ldrb r3, [r0] ; DISABLE-NEXT: ldrb r3, [r12, r3] ; DISABLE-NEXT: add r0, r0, r3 ; DISABLE-NEXT: sub r3, r1, #1 ; DISABLE-NEXT: cmp r3, r1 -; DISABLE-NEXT: bhs .LBB0_5 -; DISABLE-NEXT: @ %bb.4: @ %while.body -; DISABLE-NEXT: @ in Loop: Header=BB0_3 Depth=1 +; DISABLE-NEXT: bhs .LBB0_6 +; DISABLE-NEXT: @ %bb.5: @ %while.body +; DISABLE-NEXT: @ in Loop: Header=BB0_4 Depth=1 ; DISABLE-NEXT: cmp r0, r2 ; DISABLE-NEXT: mov r1, r3 -; DISABLE-NEXT: blo .LBB0_3 -; DISABLE-NEXT: .LBB0_5: @ %if.end29 +; DISABLE-NEXT: blo .LBB0_4 +; DISABLE-NEXT: .LBB0_6: @ %if.end29 ; DISABLE-NEXT: pop {r11, pc} -; DISABLE-NEXT: .LBB0_6: @ %while.cond2.outer +; DISABLE-NEXT: .LBB0_7: @ %while.cond2.outer ; DISABLE-NEXT: @ =>This Loop Header: Depth=1 -; DISABLE-NEXT: @ Child Loop BB0_7 Depth 2 -; DISABLE-NEXT: @ Child Loop BB0_14 Depth 2 +; DISABLE-NEXT: @ Child Loop BB0_8 Depth 2 +; DISABLE-NEXT: @ Child Loop BB0_15 Depth 2 ; DISABLE-NEXT: mov r3, r0 -; DISABLE-NEXT: .LBB0_7: @ %while.cond2 -; DISABLE-NEXT: @ Parent Loop BB0_6 Depth=1 +; DISABLE-NEXT: .LBB0_8: @ %while.cond2 +; DISABLE-NEXT: @ Parent Loop BB0_7 Depth=1 ; DISABLE-NEXT: @ => This Inner Loop Header: Depth=2 ; DISABLE-NEXT: add r1, r1, #1 ; DISABLE-NEXT: cmp r1, #1 -; DISABLE-NEXT: beq .LBB0_17 -; DISABLE-NEXT: @ %bb.8: @ %while.body4 -; DISABLE-NEXT: @ in Loop: Header=BB0_7 Depth=2 +; DISABLE-NEXT: beq .LBB0_18 +; DISABLE-NEXT: @ %bb.9: @ %while.body4 +; DISABLE-NEXT: @ in Loop: Header=BB0_8 Depth=2 ; DISABLE-NEXT: cmp r3, r2 -; DISABLE-NEXT: bls .LBB0_7 -; DISABLE-NEXT: @ %bb.9: @ %if.then7 -; DISABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; DISABLE-NEXT: bls .LBB0_8 +; DISABLE-NEXT: @ %bb.10: @ %if.then7 +; DISABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 ; DISABLE-NEXT: mov r0, r3 ; DISABLE-NEXT: ldrb r12, [r0, #-1]! ; DISABLE-NEXT: sxtb lr, r12 ; DISABLE-NEXT: cmn lr, #1 -; DISABLE-NEXT: bgt .LBB0_6 -; DISABLE-NEXT: @ %bb.10: @ %if.then7 -; DISABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; DISABLE-NEXT: bgt .LBB0_7 +; DISABLE-NEXT: @ %bb.11: @ %if.then7 +; DISABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 ; DISABLE-NEXT: cmp r0, r2 -; DISABLE-NEXT: bls .LBB0_6 -; DISABLE-NEXT: @ %bb.11: @ %land.rhs14.preheader -; DISABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 -; DISABLE-NEXT: cmn lr, #1 -; DISABLE-NEXT: bgt .LBB0_6 +; DISABLE-NEXT: bls .LBB0_7 ; DISABLE-NEXT: @ %bb.12: @ %land.rhs14.preheader -; DISABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; DISABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 +; DISABLE-NEXT: cmn lr, #1 +; DISABLE-NEXT: bgt .LBB0_7 +; DISABLE-NEXT: @ %bb.13: @ %land.rhs14.preheader +; DISABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 ; DISABLE-NEXT: cmp r12, #191 -; DISABLE-NEXT: bhi .LBB0_6 -; DISABLE-NEXT: @ %bb.13: @ %while.body24.preheader -; DISABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; DISABLE-NEXT: bhi .LBB0_7 +; DISABLE-NEXT: @ %bb.14: @ %while.body24.preheader +; DISABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 ; DISABLE-NEXT: sub r3, r3, #2 -; DISABLE-NEXT: .LBB0_14: @ %while.body24 -; DISABLE-NEXT: @ Parent Loop BB0_6 Depth=1 +; DISABLE-NEXT: .LBB0_15: @ %while.body24 +; DISABLE-NEXT: @ Parent Loop BB0_7 Depth=1 ; DISABLE-NEXT: @ => This Inner Loop Header: Depth=2 ; DISABLE-NEXT: mov r0, r3 ; DISABLE-NEXT: cmp r3, r2 -; DISABLE-NEXT: bls .LBB0_6 -; DISABLE-NEXT: @ %bb.15: @ %while.body24.land.rhs14_crit_edge -; DISABLE-NEXT: @ in Loop: Header=BB0_14 Depth=2 +; DISABLE-NEXT: bls .LBB0_7 +; DISABLE-NEXT: @ %bb.16: @ %while.body24.land.rhs14_crit_edge +; DISABLE-NEXT: @ in Loop: Header=BB0_15 Depth=2 ; DISABLE-NEXT: mov r3, r0 ; DISABLE-NEXT: ldrsb lr, [r3], #-1 ; DISABLE-NEXT: cmn lr, #1 ; DISABLE-NEXT: uxtb r12, lr -; DISABLE-NEXT: bgt .LBB0_6 -; DISABLE-NEXT: @ %bb.16: @ %while.body24.land.rhs14_crit_edge -; DISABLE-NEXT: @ in Loop: Header=BB0_14 Depth=2 +; DISABLE-NEXT: bgt .LBB0_7 +; DISABLE-NEXT: @ %bb.17: @ %while.body24.land.rhs14_crit_edge +; DISABLE-NEXT: @ in Loop: Header=BB0_15 Depth=2 ; DISABLE-NEXT: cmp r12, #192 -; DISABLE-NEXT: blo .LBB0_14 -; DISABLE-NEXT: b .LBB0_6 -; DISABLE-NEXT: .LBB0_17: +; DISABLE-NEXT: blo .LBB0_15 +; DISABLE-NEXT: b .LBB0_7 +; DISABLE-NEXT: .LBB0_18: ; DISABLE-NEXT: mov r0, r3 ; DISABLE-NEXT: pop {r11, pc} entry: diff --git a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll --- a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARM ; RUN: llc < %s -mtriple=thumb-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMB @@ -8,78 +9,91 @@ ; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMBV7 define zeroext i1 @test_cmpxchg_res_i8(i8* %addr, i8 %desired, i8 zeroext %new) { +; CHECK-ARM-LABEL: test_cmpxchg_res_i8: +; CHECK-ARM: .save {r4, lr} +; CHECK-ARM-NEXT: push {r4, lr} +; CHECK-ARM-NEXT: mov r4, r1 +; CHECK-ARM-NEXT: bl __sync_val_compare_and_swap_1 +; CHECK-ARM-NEXT: and r1, r4, #255 +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: rsbs r1, r0, #0 +; CHECK-ARM-NEXT: adc r0, r0, r1 +; CHECK-ARM-NEXT: pop {r4, lr} +; CHECK-ARM-NEXT: mov pc, lr +; +; CHECK-THUMB-LABEL: test_cmpxchg_res_i8: +; CHECK-THUMB: .save {r4, lr} +; CHECK-THUMB-NEXT: push {r4, lr} +; CHECK-THUMB-NEXT: movs r4, r1 +; CHECK-THUMB-NEXT: bl __sync_val_compare_and_swap_1 +; CHECK-THUMB-NEXT: movs r1, #255 +; CHECK-THUMB-NEXT: ands r1, r4 +; CHECK-THUMB-NEXT: subs r1, r0, r1 +; CHECK-THUMB-NEXT: rsbs r0, r1, #0 +; CHECK-THUMB-NEXT: adcs r0, r1 +; CHECK-THUMB-NEXT: pop {r4} +; CHECK-THUMB-NEXT: pop {r1} +; CHECK-THUMB-NEXT: bx r1 +; +; CHECK-ARMV6-LABEL: test_cmpxchg_res_i8: +; CHECK-ARMV6: uxtb r1, r1 +; CHECK-ARMV6-NEXT: .LBB0_1: +; CHECK-ARMV6-NEXT: ldrexb r3, [r0] +; CHECK-ARMV6-NEXT: cmp r3, r1 +; CHECK-ARMV6-NEXT: movne r0, #0 +; CHECK-ARMV6-NEXT: bxne lr +; CHECK-ARMV6-NEXT: .LBB0_2: +; CHECK-ARMV6-NEXT: strexb r3, r2, [r0] +; CHECK-ARMV6-NEXT: cmp r3, #0 +; CHECK-ARMV6-NEXT: moveq r0, #1 +; CHECK-ARMV6-NEXT: bxeq lr +; CHECK-ARMV6-NEXT: b .LBB0_1 +; +; CHECK-THUMBV6-LABEL: test_cmpxchg_res_i8: +; CHECK-THUMBV6: .save {r4, lr} +; CHECK-THUMBV6-NEXT: push {r4, lr} +; CHECK-THUMBV6-NEXT: mov r4, r1 +; CHECK-THUMBV6-NEXT: bl __sync_val_compare_and_swap_1 +; CHECK-THUMBV6-NEXT: uxtb r1, r4 +; CHECK-THUMBV6-NEXT: subs r1, r0, r1 +; CHECK-THUMBV6-NEXT: rsbs r0, r1, #0 +; CHECK-THUMBV6-NEXT: adcs r0, r1 +; CHECK-THUMBV6-NEXT: pop {r4, pc} +; +; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8: +; CHECK-ARMV7: uxtb r1, r1 +; CHECK-ARMV7-NEXT: .LBB0_1: +; CHECK-ARMV7-NEXT: ldrexb r3, [r0] +; CHECK-ARMV7-NEXT: cmp r3, r1 +; CHECK-ARMV7-NEXT: bne .LBB0_3 +; CHECK-ARMV7-NEXT: strexb r3, r2, [r0] +; CHECK-ARMV7-NEXT: cmp r3, #0 +; CHECK-ARMV7-NEXT: moveq r0, #1 +; CHECK-ARMV7-NEXT: bxeq lr +; CHECK-ARMV7-NEXT: b .LBB0_1 +; CHECK-ARMV7-NEXT: .LBB0_3: +; CHECK-ARMV7-NEXT: mov r0, #0 +; CHECK-ARMV7-NEXT: clrex +; CHECK-ARMV7-NEXT: bx lr +; +; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8: +; CHECK-THUMBV7: uxtb r1, r1 +; CHECK-THUMBV7-NEXT: .LBB0_1: +; CHECK-THUMBV7-NEXT: ldrexb r3, [r0] +; CHECK-THUMBV7-NEXT: cmp r3, r1 +; CHECK-THUMBV7-NEXT: bne .LBB0_3 +; CHECK-THUMBV7-NEXT: strexb r3, r2, [r0] +; CHECK-THUMBV7-NEXT: cmp r3, #0 +; CHECK-THUMBV7-NEXT: itt eq +; CHECK-THUMBV7-NEXT: moveq r0, #1 +; CHECK-THUMBV7-NEXT: bxeq lr +; CHECK-THUMBV7-NEXT: b .LBB0_1 +; CHECK-THUMBV7-NEXT: .LBB0_3: +; CHECK-THUMBV7-NEXT: movs r0, #0 +; CHECK-THUMBV7-NEXT: clrex +; CHECK-THUMBV7-NEXT: bx lr entry: %0 = cmpxchg i8* %addr, i8 %desired, i8 %new monotonic monotonic %1 = extractvalue { i8, i1 } %0, 1 ret i1 %1 } - -; CHECK-ARM-LABEL: test_cmpxchg_res_i8 -; CHECK-ARM: bl __sync_val_compare_and_swap_1 -; CHECK-ARM: sub r0, r0, {{r[0-9]+}} -; CHECK-ARM: rsbs [[REG:r[0-9]+]], r0, #0 -; CHECK-ARM: adc r0, r0, [[REG]] - -; CHECK-THUMB-LABEL: test_cmpxchg_res_i8 -; CHECK-THUMB: bl __sync_val_compare_and_swap_1 -; CHECK-THUMB-NOT: mov [[R1:r[0-7]]], r0 -; CHECK-THUMB: subs [[R1:r[0-7]]], r0, {{r[0-9]+}} -; CHECK-THUMB: rsbs r0, [[R1]], #0 -; CHECK-THUMB: adcs r0, [[R1]] - -; CHECK-ARMV6-LABEL: test_cmpxchg_res_i8: -; CHECK-ARMV6-NEXT: .fnstart -; CHECK-ARMV6-NEXT: uxtb [[DESIRED:r[0-9]+]], r1 -; CHECK-ARMV6-NEXT: [[TRY:.LBB[0-9_]+]]: -; CHECK-ARMV6-NEXT: ldrexb [[LD:r[0-9]+]], [r0] -; CHECK-ARMV6-NEXT: cmp [[LD]], [[DESIRED]] -; CHECK-ARMV6-NEXT: movne [[RES:r[0-9]+]], #0 -; CHECK-ARMV6-NEXT: bxne lr -; CHECK-ARMV6-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0] -; CHECK-ARMV6-NEXT: cmp [[SUCCESS]], #0 -; CHECK-ARMV6-NEXT: moveq [[RES]], #1 -; CHECK-ARMV6-NEXT: bxeq lr -; CHECK-ARMV6-NEXT: b [[TRY]] - -; CHECK-THUMBV6-LABEL: test_cmpxchg_res_i8: -; CHECK-THUMBV6: mov [[EXPECTED:r[0-9]+]], r1 -; CHECK-THUMBV6-NEXT: bl __sync_val_compare_and_swap_1 -; CHECK-THUMBV6-NEXT: uxtb r1, r4 -; CHECK-THUMBV6-NEXT: subs [[R1:r[0-7]]], r0, {{r[0-9]+}} -; CHECK-THUMBV6-NEXT: rsbs r0, [[R1]], #0 -; CHECK-THUMBV6-NEXT: adcs r0, [[R1]] - -; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8: -; CHECK-ARMV7-NEXT: .fnstart -; CHECK-ARMV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1 -; CHECK-ARMV7-NEXT: [[TRY:.LBB[0-9_]+]]: -; CHECK-ARMV7-NEXT: ldrexb [[SUCCESS:r[0-9]+]], [r0] -; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], r1 -; CHECK-ARMV7-NEXT: bne [[EXIT:.LBB[0-9_]+]] -; CHECK-ARMV7-NEXT: strexb [[SUCCESS]], r2, [r0] -; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0 -; CHECK-ARMV7-NEXT: moveq r0, #1 -; CHECK-ARMV7-NEXT: bxeq lr -; CHECK-ARMV7-NEXT: b [[TRY]] -; CHECK-ARMV7-NEXT: [[EXIT]]: -; CHECK-ARMV7-NEXT: mov r0, #0 -; CHECK-ARMV7-NEXT: clrex -; CHECK-ARMV7-NEXT: bx lr - -; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8: -; CHECK-THUMBV7-NEXT: .fnstart -; CHECK-THUMBV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1 -; CHECK-THUMBV7-NEXT: [[TRYLD:.LBB[0-9_]+]] -; CHECK-THUMBV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0] -; CHECK-THUMBV7-NEXT: cmp [[LD]], [[DESIRED]] -; CHECK-THUMBV7-NEXT: bne [[EXIT:.LBB[0-9_]+]] -; CHECK-THUMBV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0] -; CHECK-THUMBV7-NEXT: cmp [[SUCCESS]], #0 -; CHECK-THUMBV7-NEXT: itt eq -; CHECK-THUMBV7-NEXT: moveq r0, #1 -; CHECK-THUMBV7-NEXT: bxeq lr -; CHECK-THUMBV7-NEXT: b [[TRYLD]] -; CHECK-THUMBV7-NEXT: [[EXIT]]: -; CHECK-THUMBV7-NEXT: movs r0, #0 -; CHECK-THUMBV7-NEXT: clrex -; CHECK-THUMBV7-NEXT: bx lr diff --git a/llvm/test/CodeGen/ARM/call-tc.ll b/llvm/test/CodeGen/ARM/call-tc.ll --- a/llvm/test/CodeGen/ARM/call-tc.ll +++ b/llvm/test/CodeGen/ARM/call-tc.ll @@ -85,6 +85,7 @@ ; CHECKT2D-LABEL: t7: ; CHECKT2D: it ne ; CHECKT2D-NEXT: bne.w _foo +; CHECKT2D-NEXT: LBB{{.*}}: ; CHECKT2D-NEXT: push ; CHECKT2D-NEXT: mov r7, sp ; CHECKT2D-NEXT: bl _foo diff --git a/llvm/test/CodeGen/ARM/cmp-bool.ll b/llvm/test/CodeGen/ARM/cmp-bool.ll --- a/llvm/test/CodeGen/ARM/cmp-bool.ll +++ b/llvm/test/CodeGen/ARM/cmp-bool.ll @@ -8,6 +8,7 @@ ; ARM: @ %bb.0: @ %entry ; ARM-NEXT: cmp r0, r1 ; ARM-NEXT: bxne lr +; ARM-NEXT: .LBB0_1: @ %if.then ; ARM-NEXT: bx r2 ; ; THUMB-LABEL: bool_eq: @@ -25,6 +26,7 @@ ; THUMB2-NEXT: cmp r0, r1 ; THUMB2-NEXT: it ne ; THUMB2-NEXT: bxne lr +; THUMB2-NEXT: .LBB0_1: @ %if.then ; THUMB2-NEXT: bx r2 entry: %0 = xor i1 %a, %b @@ -43,6 +45,7 @@ ; ARM: @ %bb.0: @ %entry ; ARM-NEXT: cmp r0, r1 ; ARM-NEXT: bxeq lr +; ARM-NEXT: .LBB1_1: @ %if.then ; ARM-NEXT: bx r2 ; ; THUMB-LABEL: bool_ne: @@ -60,6 +63,7 @@ ; THUMB2-NEXT: cmp r0, r1 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: bxeq lr +; THUMB2-NEXT: .LBB1_1: @ %if.then ; THUMB2-NEXT: bx r2 entry: %cmp = xor i1 %a, %b diff --git a/llvm/test/CodeGen/ARM/cmpxchg-weak.ll b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll --- a/llvm/test/CodeGen/ARM/cmpxchg-weak.ll +++ b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll @@ -2,9 +2,6 @@ define void @test_cmpxchg_weak(i32 *%addr, i32 %desired, i32 %new) { ; CHECK-LABEL: test_cmpxchg_weak: - - %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic - %oldval = extractvalue { i32, i1 } %pair, 0 ; CHECK-NEXT: %bb.0: ; CHECK-NEXT: ldrex [[LOADED:r[0-9]+]], [r0] ; CHECK-NEXT: cmp [[LOADED]], r1 @@ -25,18 +22,15 @@ ; CHECK-NEXT: dmb ish ; CHECK-NEXT: str r3, [r0] ; CHECK-NEXT: bx lr - +; + %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic + %oldval = extractvalue { i32, i1 } %pair, 0 store i32 %oldval, i32* %addr ret void } - define i1 @test_cmpxchg_weak_to_bool(i32, i32 *%addr, i32 %desired, i32 %new) { ; CHECK-LABEL: test_cmpxchg_weak_to_bool: - - %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic - %success = extractvalue { i32, i1 } %pair, 1 - ; CHECK-NEXT: %bb.0: ; CHECK-NEXT: ldrex [[LOADED:r[0-9]+]], [r1] ; CHECK-NEXT: cmp [[LOADED]], r2 @@ -47,6 +41,7 @@ ; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r3, [r1] ; CHECK-NEXT: cmp [[SUCCESS]], #0 ; CHECK-NEXT: bxne lr +; CHECK-NEXT: LBB1_2: ; CHECK-NEXT: mov r0, #1 ; CHECK-NEXT: dmb ish ; CHECK-NEXT: bx lr @@ -54,6 +49,8 @@ ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: clrex ; CHECK-NEXT: bx lr - +; + %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic + %success = extractvalue { i32, i1 } %pair, 1 ret i1 %success } diff --git a/llvm/test/CodeGen/ARM/code-placement.ll b/llvm/test/CodeGen/ARM/code-placement.ll --- a/llvm/test/CodeGen/ARM/code-placement.ll +++ b/llvm/test/CodeGen/ARM/code-placement.ll @@ -1,6 +1,5 @@ ; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s ; PHI elimination shouldn't break backedge. -; rdar://8263994 %struct.list_data_s = type { i16, i16 } %struct.list_head = type { %struct.list_head*, %struct.list_data_s* } @@ -12,6 +11,7 @@ br i1 %0, label %bb2, label %bb bb: +; CHECK: LBB0_1: ; CHECK: LBB0_[[LABEL:[0-9]]]: ; CHECK: bne LBB0_[[LABEL]] ; CHECK-NOT: b LBB0_[[LABEL]] @@ -30,7 +30,6 @@ } ; Optimize loop entry, eliminate intra loop branches -; rdar://8117827 define i32 @t2(i32 %passes, i32* nocapture %src, i32 %size) nounwind readonly { entry: ; CHECK-LABEL: t2: diff --git a/llvm/test/CodeGen/ARM/codesize-ifcvt.mir b/llvm/test/CodeGen/ARM/codesize-ifcvt.mir --- a/llvm/test/CodeGen/ARM/codesize-ifcvt.mir +++ b/llvm/test/CodeGen/ARM/codesize-ifcvt.mir @@ -158,31 +158,37 @@ body: | ; CHECK-V7-LABEL: name: test_nosize ; CHECK-V7: bb.0 (%ir-block.0): - ; CHECK-V7: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-V7: successors: %bb.1(0x80000000) ; CHECK-V7: liveins: $lr, $r7 ; CHECK-V7: renamable $r0 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg ; CHECK-V7: t2CMPri killed renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-V7: tTAILJMPdND @extfunc, 1 /* CC::ne */, killed $cpsr, implicit $sp, implicit $sp + ; CHECK-V7: bb.1.b1: + ; CHECK-V7: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK-V7: liveins: $r7, $lr ; CHECK-V7: $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r7, killed $lr ; CHECK-V7: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK-V7: frame-setup CFI_INSTRUCTION offset $lr, -4 ; CHECK-V7: frame-setup CFI_INSTRUCTION offset $r7, -8 ; CHECK-V7: renamable $r0 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg ; CHECK-V7: t2CMPri killed renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK-V7: t2Bcc %bb.2, 1 /* CC::ne */, killed $cpsr - ; CHECK-V7: bb.1.b2: - ; CHECK-V7: successors: %bb.3(0x80000000) + ; CHECK-V7: t2Bcc %bb.3, 1 /* CC::ne */, killed $cpsr + ; CHECK-V7: bb.2.b2: + ; CHECK-V7: successors: %bb.4(0x80000000) ; CHECK-V7: tBL 14 /* CC::al */, $noreg, @extfunc, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0 - ; CHECK-V7: t2B %bb.3, 14 /* CC::al */, $noreg - ; CHECK-V7: bb.2.b3: - ; CHECK-V7: successors: %bb.3(0x80000000) + ; CHECK-V7: t2B %bb.4, 14 /* CC::al */, $noreg + ; CHECK-V7: bb.3.b3: + ; CHECK-V7: successors: %bb.4(0x80000000) ; CHECK-V7: renamable $r0 = t2LDRi12 undef renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from `i32* undef`) ; CHECK-V7: renamable $r0 = t2ANDri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg - ; CHECK-V7: bb.3.b5: + ; CHECK-V7: bb.4.b5: + ; CHECK-V7: successors: %bb.5(0x50000000) ; CHECK-V7: liveins: $r0 ; CHECK-V7: t2CMPri killed renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-V7: $sp = t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr ; CHECK-V7: tBX_RET 0 /* CC::eq */, killed $cpsr + ; CHECK-V7: bb.5.b7: + ; CHECK-V7: liveins: $lr, $r7 ; CHECK-V7: tTAILJMPdND @extfunc, 14 /* CC::al */, $noreg, implicit $sp, implicit $sp ; CHECK-V8-LABEL: name: test_nosize ; CHECK-V8: bb.0 (%ir-block.0): diff --git a/llvm/test/CodeGen/ARM/constant-islands-split-IT.mir b/llvm/test/CodeGen/ARM/constant-islands-split-IT.mir --- a/llvm/test/CodeGen/ARM/constant-islands-split-IT.mir +++ b/llvm/test/CodeGen/ARM/constant-islands-split-IT.mir @@ -99,12 +99,10 @@ ; CHECK: successors: ; CHECK: CONSTPOOL_ENTRY 7, %const.1, 8 ; CHECK: bb.7 (align 2): - ; CHECK: successors: ; CHECK: liveins: $r0, $cpsr, $d0, $s0, $s1, $d1, $s2, $s3, $d2, $s4, $s5 ; CHECK: t2IT 0, 4, implicit-def $itstate ; CHECK: $sp = tMOVr $r0, 0 /* CC::eq */, $cpsr, implicit $itstate ; CHECK: $sp = t2LDMIA_RET $sp, 0 /* CC::eq */, killed $cpsr, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc, implicit killed $d0, implicit killed $d1, implicit killed $d2, implicit $sp, implicit killed $itstate - ; CHECK: tBL 14 /* CC::al */, $noreg, &__stack_chk_fail, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp ; CHECK: bb.8 (align 8): ; CHECK: successors: ; CHECK: CONSTPOOL_ENTRY 6, %const.0, 8 @@ -138,7 +136,6 @@ t2IT 0, 4, implicit-def $itstate $sp = tMOVr $r0, 0, $cpsr, implicit $itstate $sp = t2LDMIA_RET $sp, 0, killed $cpsr, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc, implicit killed $d0, implicit killed $d1, implicit killed $d2, implicit $sp, implicit killed $itstate - tBL 14, $noreg, &__stack_chk_fail, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp bb.3: successors: %bb.3(0x80000000) diff --git a/llvm/test/CodeGen/ARM/csr-split.ll b/llvm/test/CodeGen/ARM/csr-split.ll --- a/llvm/test/CodeGen/ARM/csr-split.ll +++ b/llvm/test/CodeGen/ARM/csr-split.ll @@ -15,12 +15,13 @@ ; CHECK-NEXT: cmp r0, r4 ; CHECK-NEXT: popne {r4, lr} ; CHECK-NEXT: movne pc, lr +; CHECK-NEXT: .LBB0_1: @ %if.then ; CHECK-NEXT: bl callVoid ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: pop {r4, lr} ; CHECK-NEXT: b callNonVoid ; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: @ %bb.2: ; CHECK-NEXT: .LCPI0_0: ; CHECK-NEXT: .long a entry: diff --git a/llvm/test/CodeGen/ARM/machine-sink-multidef.ll b/llvm/test/CodeGen/ARM/machine-sink-multidef.ll --- a/llvm/test/CodeGen/ARM/machine-sink-multidef.ll +++ b/llvm/test/CodeGen/ARM/machine-sink-multidef.ll @@ -21,10 +21,11 @@ ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: popne {r11, lr} ; CHECK-NEXT: movne pc, lr +; CHECK-NEXT: .LBB0_1: @ %if.then5 ; CHECK-NEXT: ldr r1, [r1, #4] ; CHECK-NEXT: bl k ; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: @ %bb.2: ; CHECK-NEXT: .LCPI0_0: ; CHECK-NEXT: .long f ; CHECK-NEXT: .LCPI0_1: diff --git a/llvm/test/CodeGen/ARM/peephole-bitcast.ll b/llvm/test/CodeGen/ARM/peephole-bitcast.ll --- a/llvm/test/CodeGen/ARM/peephole-bitcast.ll +++ b/llvm/test/CodeGen/ARM/peephole-bitcast.ll @@ -13,6 +13,7 @@ ; CHECK-NEXT: movt r1, #32639 ; CHECK-NEXT: cmp r0, r1 ; CHECK-NEXT: bxhi lr +; CHECK-NEXT: .LBB0_1: @ %if.then ; CHECK-NEXT: b doSomething entry: %0 = bitcast float %x to i32 diff --git a/llvm/test/CodeGen/ARM/reg_sequence.ll b/llvm/test/CodeGen/ARM/reg_sequence.ll --- a/llvm/test/CodeGen/ARM/reg_sequence.ll +++ b/llvm/test/CodeGen/ARM/reg_sequence.ll @@ -285,6 +285,7 @@ ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: movne r0, #0 ; CHECK-NEXT: bxne lr +; CHECK-NEXT: LBB9_1: ; CHECK-NEXT: trap entry: %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] diff --git a/llvm/test/CodeGen/ARM/sched-it-debug-nodes.mir b/llvm/test/CodeGen/ARM/sched-it-debug-nodes.mir deleted file mode 100644 --- a/llvm/test/CodeGen/ARM/sched-it-debug-nodes.mir +++ /dev/null @@ -1,157 +0,0 @@ -# RUN: llc -mtriple thumbv7 -verify-machineinstrs -start-after if-converter -print-before post-RA-sched -print-after post-RA-sched %s -o /dev/null 2>&1 | FileCheck %s ---- | - ; ModuleID = '/Volumes/Data/llvm/test/CodeGen/ARM/sched-it-debug-nodes.ll' - target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" - target triple = "thumbv7" - - %struct.s = type opaque - - ; Function Attrs: nounwind - define arm_aapcscc i32 @f(%struct.s* %s, i32 %u, i8* %b, i32 %n) #0 !dbg !4 { - entry: - tail call void @llvm.dbg.value(metadata %struct.s* %s, i64 0, metadata !18, metadata !27), !dbg !28 - tail call void @llvm.dbg.value(metadata i32 %u, i64 0, metadata !19, metadata !27), !dbg !28 - tail call void @llvm.dbg.value(metadata i8* %b, i64 0, metadata !20, metadata !27), !dbg !28 - tail call void @llvm.dbg.value(metadata i32 %n, i64 0, metadata !21, metadata !27), !dbg !28 - %cmp = icmp ult i32 %n, 4, !dbg !29 - br i1 %cmp, label %return, label %if.end, !dbg !31 - - if.end: ; preds = %entry - tail call arm_aapcscc void @g(%struct.s* %s, i8* %b, i32 %n) #3, !dbg !32 - br label %return, !dbg !33 - - return: ; preds = %if.end, %entry - %retval.0 = phi i32 [ 0, %if.end ], [ -1, %entry ] - ret i32 %retval.0, !dbg !34 - } - - ; NOTE: This is checking that the register in the DEBUG_VALUE node is not - ; accidentally being marked as KILL. The DBG_VALUE node gets introduced in - ; If-Conversion, and gets bundled into the IT block. The Post RA Scheduler - ; attempts to schedule the Machine Instr, and tries to tag the register in the - ; debug value as KILL'ed, resulting in a DEBUG_VALUE node changing codegen! (or - ; hopefully, triggering an assert). - - ; CHECK: BUNDLE implicit-def dead $itstate{{.*}} { - ; CHECK: DBG_VALUE $r1, $noreg, !"u" - ; CHECK-NOT: DBG_VALUE killed $r1, $noreg, !"u" - - declare arm_aapcscc void @g(%struct.s*, i8*, i32) #1 - - ; Function Attrs: nounwind readnone - declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2 - - attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - attributes #2 = { nounwind readnone } - attributes #3 = { nounwind } - - !llvm.dbg.cu = !{!0} - !llvm.module.flags = !{!22, !23, !24, !25} - !llvm.ident = !{!26} - - !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (llvm/trunk 237059)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2) - !1 = !DIFile(filename: "", directory: "/Users/compnerd/Source/llvm") - !2 = !{} - !4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 9, type: !5, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !17) - !5 = !DISubroutineType(types: !6) - !6 = !{!7, !8, !11, !12, !16} - !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) - !8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 32, align: 32) - !9 = !DIDerivedType(tag: DW_TAG_typedef, name: "s", file: !1, line: 5, baseType: !10) - !10 = !DICompositeType(tag: DW_TAG_structure_type, name: "s", file: !1, line: 5, flags: DIFlagFwdDecl) - !11 = !DIBasicType(name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned) - !12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !13, size: 32, align: 32) - !13 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !14) - !14 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint8_t", file: !1, line: 2, baseType: !15) - !15 = !DIBasicType(name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char) - !16 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", file: !1, line: 3, baseType: !11) - !17 = !{!18, !19, !20, !21} - !18 = !DILocalVariable(name: "s", arg: 1, scope: !4, file: !1, line: 9, type: !8) - !19 = !DILocalVariable(name: "u", arg: 2, scope: !4, file: !1, line: 9, type: !11) - !20 = !DILocalVariable(name: "b", arg: 3, scope: !4, file: !1, line: 9, type: !12) - !21 = !DILocalVariable(name: "n", arg: 4, scope: !4, file: !1, line: 9, type: !16) - !22 = !{i32 2, !"Dwarf Version", i32 4} - !23 = !{i32 2, !"Debug Info Version", i32 3} - !24 = !{i32 1, !"wchar_size", i32 4} - !25 = !{i32 1, !"min_enum_size", i32 4} - !26 = !{!"clang version 3.7.0 (llvm/trunk 237059)"} - !27 = !DIExpression() - !28 = !DILocation(line: 9, scope: !4) - !29 = !DILocation(line: 10, scope: !30) - !30 = distinct !DILexicalBlock(scope: !4, file: !1, line: 10) - !31 = !DILocation(line: 10, scope: !4) - !32 = !DILocation(line: 13, scope: !4) - !33 = !DILocation(line: 14, scope: !4) - !34 = !DILocation(line: 15, scope: !4) - -... ---- -name: f -alignment: 2 -exposesReturnsTwice: false -tracksRegLiveness: true -liveins: - - { reg: '$r0' } - - { reg: '$r1' } - - { reg: '$r2' } - - { reg: '$r3' } -calleeSavedRegisters: [ '$lr', '$d8', '$d9', '$d10', '$d11', '$d12', '$d13', - '$d14', '$d15', '$q4', '$q5', '$q6', '$q7', '$r4', - '$r5', '$r6', '$r7', '$r8', '$r9', '$r10', '$r11', - '$s16', '$s17', '$s18', '$s19', '$s20', '$s21', - '$s22', '$s23', '$s24', '$s25', '$s26', '$s27', - '$s28', '$s29', '$s30', '$s31', '$d8_d10', '$d9_d11', - '$d10_d12', '$d11_d13', '$d12_d14', '$d13_d15', - '$q4_q5', '$q5_q6', '$q6_q7', '$q4_q5_q6_q7', '$r4_r5', - '$r6_r7', '$r8_r9', '$r10_r11', '$d8_d9_d10', '$d9_d10_d11', - '$d10_d11_d12', '$d11_d12_d13', '$d12_d13_d14', - '$d13_d14_d15', '$d8_d10_d12', '$d9_d11_d13', '$d10_d12_d14', - '$d11_d13_d15', '$d8_d10_d12_d14', '$d9_d11_d13_d15', - '$d9_d10', '$d11_d12', '$d13_d14', '$d9_d10_d11_d12', - '$d11_d12_d13_d14' ] -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 8 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: true - hasCalls: true - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -stack: - - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, callee-saved-register: '$lr', callee-saved-restored: false } - - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4, callee-saved-register: '$r7' } -body: | - bb.0.entry: - liveins: $r0, $r1, $r2, $r3, $lr, $r7 - - DBG_VALUE $r0, $noreg, !18, !27, debug-location !28 - DBG_VALUE $r1, $noreg, !19, !27, debug-location !28 - DBG_VALUE $r2, $noreg, !20, !27, debug-location !28 - DBG_VALUE $r3, $noreg, !21, !27, debug-location !28 - t2CMPri $r3, 4, 14, $noreg, implicit-def $cpsr, debug-location !31 - DBG_VALUE $r1, $noreg, !19, !27, debug-location !28 - $r0 = t2MOVi -1, 3, $cpsr, $noreg, implicit undef $r0 - DBG_VALUE $r1, $noreg, !19, !27, debug-location !28 - tBX_RET 3, $cpsr, implicit $r0, debug-location !34 - $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r7, killed $lr - frame-setup CFI_INSTRUCTION def_cfa_offset 8 - frame-setup CFI_INSTRUCTION offset $lr, -4 - frame-setup CFI_INSTRUCTION offset $r7, -8 - DBG_VALUE $r0, $noreg, !18, !27, debug-location !28 - DBG_VALUE $r1, $noreg, !19, !27, debug-location !28 - DBG_VALUE $r2, $noreg, !20, !27, debug-location !28 - DBG_VALUE $r3, $noreg, !21, !27, debug-location !28 - $r1 = tMOVr killed $r2, 14, $noreg, debug-location !32 - $r2 = tMOVr killed $r3, 14, $noreg, debug-location !32 - tBL 14, $noreg, @g, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit-def $sp, debug-location !32 - $r0 = t2MOVi 0, 14, $noreg, $noreg - $sp = t2LDMIA_RET $sp, 14, $noreg, def $r7, def $pc, implicit $r0 - -... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s ; CHECK-LABEL: mul_v16i8 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll @@ -23,13 +23,13 @@ entry: call void @llvm.set.loop.iterations.i32(i32 %N) br label %for.body.preheader - + for.body.preheader: %scevgep = getelementptr i32, i32* %a, i32 -1 %scevgep4 = getelementptr i32, i32* %c, i32 -1 %scevgep8 = getelementptr i32, i32* %b, i32 -1 br label %for.header - + for.body: %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1 %ld1 = load i32, i32* %scevgep11, align 4 @@ -66,13 +66,13 @@ entry: call void @llvm.set.loop.iterations.i32(i32 %N) br label %for.body.preheader - + for.body.preheader: %scevgep = getelementptr i32, i32* %a, i32 -1 %scevgep4 = getelementptr i32, i32* %c, i32 -1 %scevgep8 = getelementptr i32, i32* %b, i32 -1 br label %for.header - + for.body: %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1 %ld1 = load i32, i32* %scevgep11, align 4 @@ -109,13 +109,13 @@ entry: call void @llvm.set.loop.iterations.i32(i32 %N) br label %for.body.preheader - + for.body.preheader: %scevgep = getelementptr i32, i32* %a, i32 -1 %scevgep4 = getelementptr i32, i32* %c, i32 -1 %scevgep8 = getelementptr i32, i32* %b, i32 -1 br label %for.header - + for.body: %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1 %ld1 = load i32, i32* %scevgep11, align 4 @@ -152,13 +152,13 @@ entry: call void @llvm.set.loop.iterations.i32(i32 %N) br label %for.body.preheader - + for.body.preheader: %scevgep = getelementptr i32, i32* %a, i32 -1 %scevgep4 = getelementptr i32, i32* %c, i32 -1 %scevgep8 = getelementptr i32, i32* %b, i32 -1 br label %for.header - + for.body: %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1 %ld1 = load i32, i32* %scevgep11, align 4 @@ -196,13 +196,13 @@ entry: call void @llvm.set.loop.iterations.i32(i32 %N) br label %for.body.preheader - + for.body.preheader: %scevgep = getelementptr i32, i32* %a, i32 -1 %scevgep4 = getelementptr i32, i32* %c, i32 -1 %scevgep8 = getelementptr i32, i32* %b, i32 -1 br label %for.header - + for.body: %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1 %ld1 = load i32, i32* %scevgep11, align 4 @@ -239,13 +239,13 @@ entry: call void @llvm.set.loop.iterations.i32(i32 %N) br label %for.body.preheader - + for.body.preheader: %scevgep = getelementptr i32, i32* %a, i32 -1 %scevgep4 = getelementptr i32, i32* %c, i32 -1 %scevgep8 = getelementptr i32, i32* %b, i32 -1 br label %for.header - + for.body: %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1 %ld1 = load i32, i32* %scevgep11, align 4 @@ -282,13 +282,13 @@ entry: call void @llvm.set.loop.iterations.i32(i32 %N) br label %for.body.preheader - + for.body.preheader: %scevgep = getelementptr i32, i32* %a, i32 -1 %scevgep4 = getelementptr i32, i32* %c, i32 -1 %scevgep8 = getelementptr i32, i32* %b, i32 -1 br label %for.header - + for.body: %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1 %ld1 = load i32, i32* %scevgep11, align 4 @@ -326,13 +326,13 @@ entry: call void @llvm.set.loop.iterations.i32(i32 %N) br label %for.body.preheader - + for.body.preheader: %scevgep = getelementptr i32, i32* %a, i32 -1 %scevgep4 = getelementptr i32, i32* %c, i32 -1 %scevgep8 = getelementptr i32, i32* %b, i32 -1 br label %for.header - + for.body: %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1 %ld1 = load i32, i32* %scevgep11, align 4 @@ -369,13 +369,13 @@ entry: call void @llvm.set.loop.iterations.i32(i32 %N) br label %for.body.preheader - + for.body.preheader: %scevgep = getelementptr i32, i32* %a, i32 -1 %scevgep4 = getelementptr i32, i32* %c, i32 -1 %scevgep8 = getelementptr i32, i32* %b, i32 -1 br label %for.header - + for.body: %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1 %ld1 = load i32, i32* %scevgep11, align 4 @@ -416,10 +416,10 @@ %wls = call i1 @llvm.test.set.loop.iterations.i32(i32 %N) %xor = xor i1 %wls, 1 br i1 %xor, label %while.end, label %while.body.preheader - + while.body.preheader: br label %while.body - + while.body: %a.addr.06 = phi i16* [ %incdec.ptr1, %while.body ], [ %a, %while.body.preheader ] %b.addr.05 = phi i16* [ %incdec.ptr, %while.body ], [ %b, %while.body.preheader ] @@ -431,7 +431,7 @@ %count.next = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %count, i32 1) %cmp = icmp ne i32 %count.next, 0 br i1 %cmp, label %while.body, label %while.end - + while.end: ret void } @@ -449,10 +449,10 @@ %wls = call i1 @llvm.test.set.loop.iterations.i32(i32 %N) %cmp = icmp ne i1 %wls, 1 br i1 %cmp, label %while.end, label %while.body.preheader - + while.body.preheader: br label %while.body - + while.body: %a.addr.06 = phi i16* [ %incdec.ptr1, %while.body ], [ %a, %while.body.preheader ] %b.addr.05 = phi i16* [ %incdec.ptr, %while.body ], [ %b, %while.body.preheader ] @@ -464,7 +464,7 @@ %count.next = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %count, i32 1) %cmp.1 = icmp ne i32 %count.next, 0 br i1 %cmp.1, label %while.body, label %while.end - + while.end: ret void } @@ -482,10 +482,10 @@ define void @check_negated_reordered_wls(i16* nocapture %a, i16* nocapture readonly %b, i32 %N) { entry: br label %while - + while.body.preheader: br label %while.body - + while.body: %a.addr.06 = phi i16* [ %incdec.ptr1, %while.body ], [ %a, %while.body.preheader ] %b.addr.05 = phi i16* [ %incdec.ptr, %while.body ], [ %b, %while.body.preheader ] diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -8,6 +8,7 @@ ; CHECK-NEXT: itt eq ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: add.w r12, r3, #3 @@ -18,7 +19,7 @@ ; CHECK-NEXT: add.w lr, lr, r12, lsr #2 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB0_1: @ %vector.body +; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r3 ; CHECK-NEXT: and r4, r12, #15 @@ -37,8 +38,8 @@ ; CHECK-NEXT: vmul.i32 q1, q1, q2 ; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vadd.i32 q1, q1, q0 -; CHECK-NEXT: le lr, .LBB0_1 -; CHECK-NEXT: @ %bb.2: @ %middle.block +; CHECK-NEXT: le lr, .LBB0_2 +; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: add sp, #4 @@ -406,9 +407,10 @@ ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB4_1: @ %bb3 ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dlstp.32 lr, r2 -; CHECK-NEXT: .LBB4_1: @ %bb9 +; CHECK-NEXT: .LBB4_2: @ %bb9 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 @@ -418,8 +420,8 @@ ; CHECK-NEXT: vmul.i32 q0, q1, q0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q0, [r0], #16 -; CHECK-NEXT: letp lr, .LBB4_1 -; CHECK-NEXT: @ %bb.2: @ %bb27 +; CHECK-NEXT: letp lr, .LBB4_2 +; CHECK-NEXT: @ %bb.3: @ %bb27 ; CHECK-NEXT: pop {r7, pc} bb: %tmp = icmp eq i32 %arg2, 0 @@ -468,9 +470,10 @@ ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB5_1: @ %bb4 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 -; CHECK-NEXT: .LBB5_1: @ %bb12 +; CHECK-NEXT: .LBB5_2: @ %bb12 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0] ; CHECK-NEXT: vptt.i32 ne, q0, zr @@ -480,8 +483,8 @@ ; CHECK-NEXT: vmul.i32 q0, q1, q0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q0, [r0], #16 -; CHECK-NEXT: letp lr, .LBB5_1 -; CHECK-NEXT: @ %bb.2: @ %bb32 +; CHECK-NEXT: letp lr, .LBB5_2 +; CHECK-NEXT: @ %bb.3: @ %bb32 ; CHECK-NEXT: pop {r7, pc} bb: %tmp = icmp eq i32 %arg3, 0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/ctlz-non-zeros.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/ctlz-non-zeros.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/ctlz-non-zeros.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/ctlz-non-zeros.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s # CHECK-NOT: LETP @@ -7,11 +8,11 @@ entry: %cmp = icmp slt i32 %elts, 1 br i1 %cmp, label %exit, label %loop.ph - + loop.ph: ; preds = %entry call void @llvm.set.loop.iterations.i32(i32 %iters) br label %loop.body - + loop.body: ; preds = %loop.body, %loop.ph %lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %iters, %loop.ph ] %count = phi i32 [ %elts, %loop.ph ], [ %elts.rem, %loop.body ] @@ -34,20 +35,20 @@ %end = icmp ne i32 %loop.dec, 0 %lsr.iv.next = add i32 %lsr.iv, -1 br i1 %end, label %loop.body, label %exit - + exit: ; preds = %loop.body, %entry ret void } - + define arm_aapcs_vfpcc void @test_ctlz_i16(<4 x i32>* %a, <4 x i32>* %b, <4 x i32>* %c, i32 %elts, i32 %iters) #0 { entry: %cmp = icmp slt i32 %elts, 1 br i1 %cmp, label %exit, label %loop.ph - + loop.ph: ; preds = %entry call void @llvm.set.loop.iterations.i32(i32 %iters) br label %loop.body - + loop.body: ; preds = %loop.body, %loop.ph %lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %iters, %loop.ph ] %count = phi i32 [ %elts, %loop.ph ], [ %elts.rem, %loop.body ] @@ -70,20 +71,20 @@ %end = icmp ne i32 %loop.dec, 0 %lsr.iv.next = add i32 %lsr.iv, -1 br i1 %end, label %loop.body, label %exit - + exit: ; preds = %loop.body, %entry ret void } - + define arm_aapcs_vfpcc void @test_ctlz_i32(<4 x i32>* %a, <4 x i32>* %b, <4 x i32>* %c, i32 %elts, i32 %iters) #0 { entry: %cmp = icmp slt i32 %elts, 1 br i1 %cmp, label %exit, label %loop.ph - + loop.ph: ; preds = %entry call void @llvm.set.loop.iterations.i32(i32 %iters) br label %loop.body - + loop.body: ; preds = %loop.body, %loop.ph %lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %iters, %loop.ph ] %count = phi i32 [ %elts, %loop.ph ], [ %elts.rem, %loop.body ] @@ -106,11 +107,11 @@ %end = icmp ne i32 %loop.dec, 0 %lsr.iv.next = add i32 %lsr.iv, -1 br i1 %end, label %loop.body, label %exit - + exit: ; preds = %loop.body, %entry ret void } - + declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1 immarg) declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1 immarg) declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1 immarg) @@ -141,24 +142,59 @@ offsetAdjustment: 0 maxAlignment: 4 fixedStack: - - { id: 0, type: default, offset: 0, size: 4, alignment: 8, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + - { id: 0, type: default, offset: 0, size: 4, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } stack: - - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } callSites: [] constants: [] machineFunctionInfo: {} body: | + ; CHECK-LABEL: name: test_ctlz_i8 + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r4 + ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8 + ; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 11, 8, implicit-def $itstate + ; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + ; CHECK: bb.1.loop.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 + ; CHECK: renamable $r12 = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + ; CHECK: dead $lr = t2DLS renamable $r12 + ; CHECK: $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg + ; CHECK: bb.2.loop.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4 + ; CHECK: renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg + ; CHECK: MVE_VPST 4, implicit $vpr + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 2) + ; CHECK: renamable $q1 = MVE_VLDRHU16 killed renamable $r0, 0, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 2) + ; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg + ; CHECK: renamable $r4, dead $cpsr = tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg + ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 8, 14 /* CC::al */, $noreg + ; CHECK: renamable $q1 = MVE_VCLZs8 killed renamable $q1, 0, $noreg, undef renamable $q1 + ; CHECK: $r0 = tMOVr $r1, 14 /* CC::al */, $noreg + ; CHECK: renamable $q1 = MVE_VQSHRUNs16th killed renamable $q1, killed renamable $q0, 1, 0, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $r2 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 2) + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.exit: + ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: successors: %bb.1(0x80000000) liveins: $r0, $r1, $r2, $r3, $r4, $lr - + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -166,14 +202,19 @@ tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + + bb.1.loop.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r4, $lr + renamable $r12 = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) t2DoLoopStart renamable $r12 $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg - - bb.1.loop.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + + bb.2.loop.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $r0, $r1, $r2, $r3, $r4 - + renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg MVE_VPST 4, implicit $vpr renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 2) @@ -187,10 +228,10 @@ renamable $q1 = MVE_VQSHRUNs16th killed renamable $q1, killed renamable $q0, 1, 0, $noreg MVE_VPST 8, implicit $vpr renamable $r2 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 2) - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg - - bb.2.exit: + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg + + bb.3.exit: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc ... @@ -209,24 +250,58 @@ offsetAdjustment: 0 maxAlignment: 4 fixedStack: - - { id: 0, type: default, offset: 0, size: 4, alignment: 8, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + - { id: 0, type: default, offset: 0, size: 4, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } stack: - - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } callSites: [] constants: [] machineFunctionInfo: {} body: | + ; CHECK-LABEL: name: test_ctlz_i16 + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r4 + ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8 + ; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 11, 8, implicit-def $itstate + ; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + ; CHECK: bb.1.loop.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 + ; CHECK: renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + ; CHECK: dead $lr = t2DLS renamable $r4 + ; CHECK: $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg + ; CHECK: bb.2.loop.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3, $r12 + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg + ; CHECK: $lr = tMOVr $r12, 14 /* CC::al */, $noreg + ; CHECK: MVE_VPST 4, implicit $vpr + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 4) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 4) + ; CHECK: renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg + ; CHECK: renamable $q1 = MVE_VCLZs16 killed renamable $q1, 0, $noreg, undef renamable $q1 + ; CHECK: renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4) + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.exit: + ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: successors: %bb.1(0x80000000) liveins: $r0, $r1, $r2, $r3, $r4, $lr - + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -234,14 +309,19 @@ tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + + bb.1.loop.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r4, $lr + renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) t2DoLoopStart renamable $r4 $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg - - bb.1.loop.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + + bb.2.loop.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $r0, $r1, $r2, $r3, $r12 - + renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg $lr = tMOVr $r12, 14 /* CC::al */, $noreg MVE_VPST 4, implicit $vpr @@ -254,10 +334,10 @@ renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg MVE_VPST 8, implicit $vpr renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4) - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg - - bb.2.exit: + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg + + bb.3.exit: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc ... @@ -276,24 +356,58 @@ offsetAdjustment: 0 maxAlignment: 4 fixedStack: - - { id: 0, type: default, offset: 0, size: 4, alignment: 8, stack-id: default, - isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + - { id: 0, type: default, offset: 0, size: 4, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } stack: - - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } callSites: [] constants: [] machineFunctionInfo: {} body: | + ; CHECK-LABEL: name: test_ctlz_i32 + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r4 + ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8 + ; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 11, 8, implicit-def $itstate + ; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + ; CHECK: bb.1.loop.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 + ; CHECK: renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + ; CHECK: dead $lr = t2DLS renamable $r4 + ; CHECK: $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg + ; CHECK: bb.2.loop.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3, $r12 + ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg + ; CHECK: $lr = tMOVr $r12, 14 /* CC::al */, $noreg + ; CHECK: MVE_VPST 4, implicit $vpr + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 4) + ; CHECK: renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 4) + ; CHECK: renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg + ; CHECK: renamable $q1 = MVE_VCLZs32 killed renamable $q1, 0, $noreg, undef renamable $q1 + ; CHECK: renamable $q0 = MVE_VQSHRUNs32th killed renamable $q0, killed renamable $q1, 3, 0, $noreg + ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4) + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.exit: + ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: successors: %bb.1(0x80000000) liveins: $r0, $r1, $r2, $r3, $r4, $lr - + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -301,14 +415,19 @@ tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + + bb.1.loop.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r4, $lr + renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) t2DoLoopStart renamable $r4 $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg - - bb.1.loop.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + + bb.2.loop.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $r0, $r1, $r2, $r3, $r12 - + renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg $lr = tMOVr $r12, 14 /* CC::al */, $noreg MVE_VPST 4, implicit $vpr @@ -321,10 +440,10 @@ renamable $q0 = MVE_VQSHRUNs32th killed renamable $q0, killed renamable $q1, 3, 0, $noreg MVE_VPST 8, implicit $vpr renamable $r2 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4) - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg - - bb.2.exit: + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg + + bb.3.exit: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir @@ -115,14 +115,17 @@ ; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $r12 = t2ADDri renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $lr = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: $lr = t2DLS killed renamable $lr - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr @@ -133,8 +136,8 @@ ; CHECK: MVE_VPST 8, implicit $vpr ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -149,6 +152,11 @@ tCMPi8 renamable $r3, 1, 14, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate tPOP_RET 11, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r7, $lr + renamable $r12 = t2ADDri renamable $r3, 3, 14, $noreg, $noreg renamable $lr = t2MOVi 1, 14, $noreg, $noreg renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg @@ -156,8 +164,8 @@ renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg t2DoLoopStart renamable $lr - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg @@ -170,10 +178,10 @@ renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14, $noreg - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: tPOP_RET 14, $noreg, def $r7, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update2.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update2.mir deleted file mode 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update2.mir +++ /dev/null @@ -1,169 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s - ---- | - define dso_local void @CPSR_not_dead(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { - entry: - %cmp8 = icmp sgt i32 %N, 0 - %0 = add i32 %N, 3 - %1 = lshr i32 %0, 2 - %2 = shl nuw i32 %1, 2 - %3 = add i32 %2, -4 - %4 = lshr i32 %3, 2 - %5 = add nuw nsw i32 %4, 1 - br i1 %cmp8, label %vector.ph, label %for.cond.cleanup - - vector.ph: ; preds = %entry - call void @llvm.set.loop.iterations.i32(i32 %5) - br label %vector.body - - vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %5, %vector.ph ] - %lsr.iv17 = phi i32* [ %scevgep18, %vector.body ], [ %A, %vector.ph ] - %lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ] - %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ] - %6 = phi i32 [ %N, %vector.ph ], [ %8, %vector.body ] - %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>* - %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* - %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>* - %7 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %6) - %8 = sub i32 %6, 4 - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %7, <4 x i32> undef) - %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %7, <4 x i32> undef) - %9 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %9, <4 x i32>* %lsr.iv1719, i32 4, <4 x i1> %7) - %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 - %scevgep15 = getelementptr i32, i32* %lsr.iv14, i32 4 - %scevgep18 = getelementptr i32, i32* %lsr.iv17, i32 4 - %10 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) - %11 = icmp ne i32 %10, 0 - %lsr.iv.next = add nsw i32 %lsr.iv1, -1 - br i1 %11, label %vector.body, label %for.cond.cleanup - - for.cond.cleanup: ; preds = %vector.body, %entry - ret void - } - declare void @llvm.set.loop.iterations.i32(i32) - declare <4 x i1> @llvm.arm.mve.vctp32(i32) - declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) - -... ---- -name: CPSR_not_dead -alignment: 2 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -registers: [] -liveins: - - { reg: '$r0', virtual-reg: '' } - - { reg: '$r1', virtual-reg: '' } - - { reg: '$r2', virtual-reg: '' } - - { reg: '$r3', virtual-reg: '' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 8 - offsetAdjustment: 0 - maxAlignment: 4 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 0 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: - - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -callSites: [] -constants: [] -machineFunctionInfo: {} -body: | - ; CHECK-LABEL: name: CPSR_not_dead - ; CHECK: bb.0.entry: - ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r4 - ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp - ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 - ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 - ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8 - ; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: t2IT 11, 8, implicit-def $itstate - ; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate - ; CHECK: $lr = MVE_DLSTP_32 renamable $r3 - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) - ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 - ; CHECK: renamable $r3, $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg - ; CHECK: t2IT 11, 8, implicit-def $itstate - ; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv13, align 4) - ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1416, align 4) - ; CHECK: renamable $q0 = nsw MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1719, align 4) - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: - ; CHECK: t2IT 11, 8, implicit-def dead $itstate - ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc - bb.0.entry: - successors: %bb.1(0x80000000) - liveins: $r0, $r1, $r2, $r3, $r4, $lr - - frame-setup tPUSH 14, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp - frame-setup CFI_INSTRUCTION def_cfa_offset 8 - frame-setup CFI_INSTRUCTION offset $lr, -4 - frame-setup CFI_INSTRUCTION offset $r4, -8 - tCMPi8 renamable $r3, 1, 14, $noreg, implicit-def $cpsr - t2IT 11, 8, implicit-def $itstate - tPOP_RET 11, killed $cpsr, def $r4, def $pc, implicit killed $itstate - renamable $r12 = t2ADDri renamable $r3, 3, 14, $noreg, $noreg - renamable $lr = t2MOVi 1, 14, $noreg, $noreg - renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg - renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg - renamable $r4 = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg - t2DoLoopStart renamable $r4 - $r12 = tMOVr killed $r4, 14, $noreg - - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) - liveins: $r0, $r1, $r2, $r3, $r12 - - renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg - $lr = tMOVr $r12, 14, $noreg - renamable $r12 = nsw t2SUBri killed $r12, 1, 14, $noreg, $noreg - renamable $r3, $cpsr = tSUBi8 killed renamable $r3, 4, 14, $noreg - t2IT 11, 8, implicit-def $itstate - tPOP_RET 11, killed $cpsr, def $r4, def $pc, implicit killed $itstate - MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv13, align 4) - renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1416, align 4) - renamable $lr = t2LoopDec killed renamable $lr, 1 - renamable $q0 = nsw MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - MVE_VPST 8, implicit $vpr - renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg - - bb.2.for.cond.cleanup: - t2IT 11, 8, implicit-def $itstate - tPOP_RET 14, $noreg, def $r4, def $pc - -... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll @@ -5,13 +5,24 @@ define void @foo(%struct.SpeexPreprocessState_* nocapture readonly %st, i16* %x) { ; CHECK-LABEL: foo: ; CHECK: @ %bb.0: @ %entry -; CHECK: dlstp.16 lr, r4 +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: ldrd r12, r4, [r0] +; CHECK-NEXT: ldrd r3, r2, [r0, #8] +; CHECK-NEXT: rsb r12, r12, r4, lsl #1 +; CHECK-NEXT: mov r4, r12 +; CHECK-NEXT: dlstp.16 lr, r4 ; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q0, [r2], #16 ; CHECK-NEXT: vstrh.16 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB0_1 -; CHECK: dlstp.16 lr, r3 +; CHECK-NEXT: @ %bb.2: @ %do.end +; CHECK-NEXT: ldr r3, [r0] +; CHECK-NEXT: ldr r0, [r0, #8] +; CHECK-NEXT: vmov.i16 q0, #0x1800 +; CHECK-NEXT: add.w r0, r0, r12, lsl #1 +; CHECK-NEXT: dlstp.16 lr, r3 ; CHECK-NEXT: .LBB0_3: @ %do.body6 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q1, [r1], #16 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll @@ -8,17 +8,18 @@ ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dlstp.16 lr, r2 -; CHECK-NEXT: .LBB0_1: @ %vector.body +; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r3, #8 ; CHECK-NEXT: vldrb.s16 q0, [r1], #8 ; CHECK-NEXT: vldrh.u16 q1, [r0] ; CHECK-NEXT: vadd.i16 q0, q1, q0 ; CHECK-NEXT: vstrh.16 q0, [r0], #16 -; CHECK-NEXT: letp lr, .LBB0_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB0_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %cmp8 = icmp eq i32 %N, 0 @@ -67,17 +68,18 @@ ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB1_1: @ %vector.ph ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dlstp.16 lr, r2 -; CHECK-NEXT: .LBB1_1: @ %vector.body +; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r3, #8 ; CHECK-NEXT: vldrb.u16 q0, [r1], #8 ; CHECK-NEXT: vldrh.u16 q1, [r0] ; CHECK-NEXT: vadd.i16 q0, q1, q0 ; CHECK-NEXT: vstrh.16 q0, [r0], #16 -; CHECK-NEXT: letp lr, .LBB1_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB1_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %cmp8 = icmp eq i32 %N, 0 @@ -126,17 +128,18 @@ ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB2_1: @ %vector.ph ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dlstp.32 lr, r2 -; CHECK-NEXT: .LBB2_1: @ %vector.body +; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: vldrh.s32 q0, [r1], #8 ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vadd.i32 q0, q1, q0 ; CHECK-NEXT: vstrw.32 q0, [r0], #16 -; CHECK-NEXT: letp lr, .LBB2_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB2_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %cmp6 = icmp eq i32 %N, 0 @@ -185,17 +188,18 @@ ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB3_1: @ %vector.ph ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dlstp.32 lr, r2 -; CHECK-NEXT: .LBB3_1: @ %vector.body +; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: vldrh.u32 q0, [r1], #8 ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vadd.i32 q0, q1, q0 ; CHECK-NEXT: vstrw.32 q0, [r0], #16 -; CHECK-NEXT: letp lr, .LBB3_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB3_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %cmp6 = icmp eq i32 %N, 0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extract-element.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extract-element.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extract-element.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extract-element.mir @@ -111,6 +111,9 @@ ; CHECK: t2IT 0, 4, implicit-def $itstate ; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate ; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r7 ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -123,8 +126,8 @@ ; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: dead $lr = t2DLS renamable $r12 ; CHECK: $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $q0, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr @@ -135,8 +138,8 @@ ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.middle.block: + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.middle.block: ; CHECK: liveins: $q0 ; CHECK: $r0 = VMOVRS killed $s3, 14 /* CC::al */, $noreg, implicit killed $q0 ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit killed $r0 @@ -148,6 +151,11 @@ t2IT 0, 4, implicit-def $itstate renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $lr, $r7 + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -161,8 +169,8 @@ t2DoLoopStart renamable $r12 $r3 = tMOVr killed $r12, 14, $noreg - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $q0, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg @@ -175,10 +183,10 @@ renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.middle.block: + bb.3.middle.block: liveins: $q0 $r0 = VMOVRS killed $s3, 14, $noreg, implicit $q0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-16.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-16.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-16.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-16.mir @@ -105,14 +105,17 @@ ; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $r12 = t2ADDri renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $lr = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: $lr = t2DLS killed renamable $lr - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr @@ -122,8 +125,8 @@ ; CHECK: renamable $q0 = nsw MVE_VADDi16 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VPST 8, implicit $vpr ; CHECK: renamable $r0 = MVE_VSTRHU16_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -136,6 +139,11 @@ tCMPi8 renamable $r3, 1, 14, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate tPOP_RET 11, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r7, $lr + renamable $r12 = t2ADDri renamable $r3, 3, 14, $noreg, $noreg renamable $lr = t2MOVi 1, 14, $noreg, $noreg renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg @@ -143,8 +151,8 @@ renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg t2DoLoopStart renamable $lr - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg @@ -156,10 +164,10 @@ MVE_VPST 8, implicit $vpr renamable $r0 = MVE_VSTRHU16_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: tPOP_RET 14, $noreg, def $r7, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-32.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-32.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-32.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-32.mir @@ -113,14 +113,17 @@ ; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $r12 = t2ADDri renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $lr = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: $lr = t2DLS killed renamable $lr - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr @@ -130,8 +133,8 @@ ; CHECK: renamable $q0 = nsw MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VPST 8, implicit $vpr ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -144,6 +147,11 @@ tCMPi8 renamable $r3, 1, 14, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate tPOP_RET 11, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r7, $lr + renamable $r12 = t2ADDri renamable $r3, 3, 14, $noreg, $noreg renamable $lr = t2MOVi 1, 14, $noreg, $noreg renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg @@ -151,8 +159,8 @@ renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg t2DoLoopStart renamable $lr - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg @@ -164,10 +172,10 @@ MVE_VPST 8, implicit $vpr renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: tPOP_RET 14, $noreg, def $r7, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-8.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-8.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-8.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-8.mir @@ -106,14 +106,17 @@ ; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $r12 = t2ADDri renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $lr = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: $lr = t2DLS killed renamable $lr - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP8 renamable $r3, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr @@ -123,8 +126,8 @@ ; CHECK: renamable $q0 = nsw MVE_VADDi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VPST 8, implicit $vpr ; CHECK: renamable $r0 = MVE_VSTRBU8_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -137,6 +140,11 @@ tCMPi8 renamable $r3, 1, 14, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate tPOP_RET 11, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r7, $lr + renamable $r12 = t2ADDri renamable $r3, 3, 14, $noreg, $noreg renamable $lr = t2MOVi 1, 14, $noreg, $noreg renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg @@ -144,8 +152,8 @@ renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg t2DoLoopStart renamable $lr - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP8 renamable $r3, 0, $noreg @@ -157,10 +165,10 @@ MVE_VPST 8, implicit $vpr renamable $r0 = MVE_VSTRBU8_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: tPOP_RET 14, $noreg, def $r7, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir @@ -98,24 +98,27 @@ ; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 8, implicit-def $itstate ; CHECK: frame-destroy tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $q2 = MVE_VMOVimmi32 4, 0, $noreg, undef renamable $q2 ; CHECK: renamable $r3 = tLEApcrel %const.0, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from constant-pool) ; CHECK: renamable $r3, dead $cpsr = tLSRri renamable $r2, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2 - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $q1, $q2, $r0, $r1 ; CHECK: renamable $vpr = MVE_VCMPu32 renamable $q1, renamable $q0, 8, 0, killed $noreg ; CHECK: MVE_VPST 4, implicit $vpr ; CHECK: renamable $r1, renamable $q3 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv35, align 4) ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q3, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv12, align 4) ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q0, renamable $q2, 0, $noreg, undef renamable $q0 - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc - ; CHECK: bb.3 (align 16): + ; CHECK: bb.4 (align 16): ; CHECK: CONSTPOOL_ENTRY 0, %const.0, 16 bb.0.entry: successors: %bb.1(0x80000000) @@ -130,6 +133,11 @@ tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate frame-destroy tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $lr + renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg renamable $q2 = MVE_VMOVimmi32 4, 0, $noreg, undef renamable $q2 renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg @@ -142,8 +150,8 @@ renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 t2DoLoopStart renamable $lr - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $q0, $q1, $q2, $r0, $r1, $r2 renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg @@ -154,13 +162,13 @@ renamable $r0 = MVE_VSTRWU32_post killed renamable $q3, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv12, align 4) renamable $q0 = MVE_VADDi32 killed renamable $q0, renamable $q2, 0, $noreg, undef renamable $q0 renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc - bb.3 (align 16): + bb.4 (align 16): CONSTPOOL_ENTRY 0, %const.0, 16 ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s ; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-GLOBAL @@ -16,11 +17,11 @@ ; CHECK: body: ; CHECK: bb.0.entry: ; CHECK: t2CMPri renamable $lr, 0 -; CHECK: tBcc %bb.3 -; CHECK: bb.1.while.body.preheader: +; CHECK: tBcc %bb.4 +; CHECK: bb.2.while.body.preheader: ; CHECK: $lr = t2DLS killed renamable $lr -; CHECK: bb.2.while.body: -; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 +; CHECK: bb.3.while.body: +; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3 define void @ne_and_guard(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { entry: %brmerge.demorgan = and i1 %t1, %t2 @@ -49,11 +50,11 @@ ; CHECK: body: ; CHECK: bb.0.entry: ; CHECK: t2CMPri renamable $lr, 0 -; CHECK: tBcc %bb.3 -; CHECK: bb.1.while.body.preheader: +; CHECK: tBcc %bb.4 +; CHECK: bb.2.while.body.preheader: ; CHECK: $lr = t2DLS killed renamable $lr -; CHECK: bb.2.while.body: -; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 +; CHECK: bb.3.while.body: +; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3 define void @ne_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { entry: %brmerge.demorgan = and i1 %t1, %t2 @@ -84,11 +85,11 @@ ; CHECK: body: ; CHECK: bb.0.entry: ; CHECK: t2CMPri renamable $lr, 0 -; CHECK: tBcc %bb.3 -; CHECK: bb.1.while.body.preheader: +; CHECK: tBcc %bb.4 +; CHECK: bb.2.while.body.preheader: ; CHECK: $lr = t2DLS killed renamable $lr -; CHECK: bb.2.while.body: -; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 +; CHECK: bb.3.while.body: +; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3 define void @eq_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { entry: %brmerge.demorgan = and i1 %t1, %t2 @@ -119,11 +120,11 @@ ; CHECK: body: ; CHECK: bb.0.entry: ; CHECK: t2CMPri renamable $lr, 0 -; CHECK: tBcc %bb.3 -; CHECK: bb.1.while.body.preheader: +; CHECK: tBcc %bb.4 +; CHECK: bb.2.while.body.preheader: ; CHECK: $lr = t2DLS killed renamable $lr -; CHECK: bb.2.while.body: -; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 +; CHECK: bb.3.while.body: +; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3 define void @ne_prepreheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { entry: %cmp = icmp ne i32 %N, 0 @@ -153,8 +154,8 @@ ; CHECK: body: ; CHECK: bb.0.entry: ; CHECK: $lr = t2DLS killed renamable $lr -; CHECK: bb.1.do.body: -; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 +; CHECK: bb.2.do.body: +; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 define void @be_ne(i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { entry: %cmp = icmp ne i32 %N, 0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/massive.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/massive.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/massive.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/massive.mir @@ -1,29 +1,27 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=armv8.1m.main -mattr=+lob -run-pass=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s -# CHECK: for.body: -# CHECK-NOT: t2DLS -# CHECK-NOT: t2LEUpdate --- | ; ModuleID = 'massive.ll' source_filename = "massive.ll" target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main" - + define dso_local arm_aapcscc void @massive(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { entry: %cmp8 = icmp eq i32 %N, 0 br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader - + for.body.preheader: ; preds = %entry %scevgep = getelementptr i32, i32* %a, i32 -1 %scevgep4 = getelementptr i32, i32* %c, i32 -1 %scevgep8 = getelementptr i32, i32* %b, i32 -1 call void @llvm.set.loop.iterations.i32(i32 %N) br label %for.body - + for.cond.cleanup: ; preds = %for.body, %entry ret void - + for.body: ; preds = %for.body, %for.body.preheader %lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ] %lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ] @@ -44,19 +42,19 @@ %4 = icmp ne i32 %3, 0 br i1 %4, label %for.body, label %for.cond.cleanup } - + ; Function Attrs: nounwind declare i32 @llvm.arm.space(i32 immarg, i32) #0 - + ; Function Attrs: noduplicate nounwind declare void @llvm.set.loop.iterations.i32(i32) #1 - + ; Function Attrs: noduplicate nounwind declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1 - + ; Function Attrs: nounwind declare void @llvm.stackprotector(i8*, i8**) #0 - + attributes #0 = { nounwind } attributes #1 = { noduplicate nounwind } @@ -98,20 +96,51 @@ restorePoint: '' fixedStack: [] stack: - - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } callSites: [] constants: [] machineFunctionInfo: {} body: | + ; CHECK-LABEL: name: massive + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r7 + ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: tCMPi8 $r3, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 0, 8, implicit-def $itstate + ; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.for.body.preheader: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 + ; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg + ; CHECK: renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14 /* CC::al */, $noreg + ; CHECK: $lr = tMOVr killed $r3, 14 /* CC::al */, $noreg + ; CHECK: bb.2.for.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2 + ; CHECK: dead renamable $r3 = SPACE 4096, undef renamable $r0 + ; CHECK: renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep3) + ; CHECK: renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep7) + ; CHECK: renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14 /* CC::al */, $noreg + ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep11) + ; CHECK: $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr + ; CHECK: t2Bcc %bb.2, 1 /* CC::ne */, killed $cpsr + ; CHECK: tB %bb.3, 14 /* CC::al */, $noreg + ; CHECK: bb.3.for.cond.cleanup: + ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: successors: %bb.1(0x80000000) liveins: $r0, $r1, $r2, $r3, $r7, $lr - + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -119,26 +148,31 @@ tCMPi8 $r3, 0, 14, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tPOP_RET 0, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.for.body.preheader: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r7, $lr + renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg $lr = tMOVr $r3, 14, $noreg t2DoLoopStart killed $r3 - - bb.1.for.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + + bb.2.for.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1, $r2 - + dead renamable $r3 = SPACE 4096, undef renamable $r0 renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep3) renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7) renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14, $noreg early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep11) renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg - - bb.2.for.cond.cleanup: + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg + + bb.3.for.cond.cleanup: tPOP_RET 14, $noreg, def $r7, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-lr-terminator.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-lr-terminator.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-lr-terminator.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-lr-terminator.mir @@ -109,11 +109,14 @@ ; CHECK: t2CMPrs killed renamable $r12, renamable $r3, 11, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 8, implicit-def $itstate ; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $r12 = t2LSRri killed renamable $r3, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12 - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep45, align 1) @@ -122,8 +125,8 @@ ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep23, align 1) ; CHECK: renamable $q0 = nuw nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1, align 4) - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -137,6 +140,11 @@ t2CMPrs killed renamable $r12, renamable $r3, 11, 14, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tPOP_RET 0, killed $cpsr, def $r4, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r4, $lr + renamable $r12 = t2MOVi 3, 14, $noreg, $noreg renamable $lr = t2MOVi 1, 14, $noreg, $noreg renamable $r12 = nuw t2ADDrs killed renamable $r12, renamable $r3, 11, 14, $noreg, $noreg @@ -148,8 +156,8 @@ t2DoLoopStart renamable $r5 $lr = tMOVr killed $r5, 14, $noreg - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1, $r2, $r3, $r12 renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14, $noreg @@ -165,10 +173,10 @@ MVE_VPST 8, implicit $vpr renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: tPOP_RET 14, $noreg, def $r4, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll @@ -2,8 +2,22 @@ ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float* nocapture %pResult) { -; CHECK-LABEL: .LBB0_1: @ %do.body.i -; CHECK: dlstp.32 lr, r1 +; CHECK-LABEL: arm_var_f32_mve: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: mov r3, r1 +; CHECK-NEXT: mov r12, r0 +; CHECK-NEXT: dlstp.32 lr, r3 +; CHECK-NEXT: .LBB0_1: @ %do.body.i +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q1, [r12], #16 +; CHECK-NEXT: vadd.f32 q0, q0, q1 +; CHECK-NEXT: letp lr, .LBB0_1 +; CHECK-NEXT: @ %bb.2: @ %arm_mean_f32_mve.exit +; CHECK-NEXT: vmov s4, r1 +; CHECK-NEXT: dlstp.32 lr, r1 ; CHECK-NEXT: vadd.f32 s0, s3, s3 ; CHECK-NEXT: vcvt.f32.u32 s4, s4 ; CHECK-NEXT: vdiv.f32 s0, s0, s4 @@ -18,6 +32,14 @@ ; CHECK-NEXT: vsub.f32 q2, q2, q1 ; CHECK-NEXT: vfma.f32 q0, q2, q2 ; CHECK-NEXT: letp lr, .LBB0_3 +; CHECK-NEXT: @ %bb.4: @ %do.end +; CHECK-NEXT: subs r0, r1, #1 +; CHECK-NEXT: vadd.f32 s0, s3, s3 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vcvt.f32.u32 s2, s2 +; CHECK-NEXT: vdiv.f32 s0, s0, s2 +; CHECK-NEXT: vstr s0, [r2] +; CHECK-NEXT: pop {r4, pc} entry: br label %do.body.i diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir @@ -114,12 +114,15 @@ ; CHECK: t2CMPrs killed renamable $r12, renamable $r3, 11, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 8, implicit-def $itstate ; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: $r12 = t2MOVr killed $r3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2LSRri killed renamable $r12, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12 ; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep45, align 1) @@ -128,8 +131,8 @@ ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep23, align 1) ; CHECK: renamable $q0 = nuw nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1, align 4) - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -143,6 +146,11 @@ t2CMPrs killed renamable $r12, renamable $r3, 11, 14, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tPOP_RET 0, killed $cpsr, def $r4, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r4, $lr + renamable $r12 = t2MOVi 3, 14, $noreg, $noreg renamable $lr = t2MOVi 1, 14, $noreg, $noreg renamable $r12 = nuw t2ADDrs killed renamable $r12, renamable $r3, 11, 14, $noreg, $noreg @@ -154,8 +162,8 @@ renamable $r3, dead $cpsr = tMOVi8 0, 14, $noreg renamable $r12 = t2LSRri killed renamable $r12, 1, 14, $noreg, $noreg - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1, $r2, $r3, $r12 renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14, $noreg @@ -171,10 +179,10 @@ MVE_VPST 8, implicit $vpr renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: tPOP_RET 14, $noreg, def $r4, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir @@ -114,6 +114,9 @@ ; CHECK: t2CMPrs killed renamable $r12, renamable $r3, 11, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 8, implicit-def $itstate ; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $r12 = t2MOVi 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = nuw t2ADDrs killed renamable $r12, renamable $r3, 11, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg @@ -122,8 +125,8 @@ ; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = t2LSRri killed renamable $r12, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12 - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep45, align 1) @@ -132,8 +135,8 @@ ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep23, align 1) ; CHECK: renamable $q0 = nuw nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1, align 4) - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -147,6 +150,11 @@ t2CMPrs killed renamable $r12, renamable $r3, 11, 14, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tPOP_RET 0, killed $cpsr, def $r4, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r4, $lr + renamable $r12 = t2MOVi 3, 14, $noreg, $noreg renamable $lr = t2MOVi 1, 14, $noreg, $noreg renamable $r12 = nuw t2ADDrs killed renamable $r12, renamable $r3, 11, 14, $noreg, $noreg @@ -158,8 +166,8 @@ renamable $r3, dead $cpsr = tMOVi8 0, 14, $noreg renamable $r12 = t2LSRri killed renamable $r12, 1, 14, $noreg, $noreg - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1, $r2, $r3, $r12 renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14, $noreg @@ -175,10 +183,10 @@ MVE_VPST 8, implicit $vpr renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: tPOP_RET 14, $noreg, def $r4, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-cond-iter-count.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-cond-iter-count.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-cond-iter-count.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-cond-iter-count.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s --- | define dso_local arm_aapcs_vfpcc void @multi_cond_iter_count(i32* noalias nocapture %0, i32* nocapture readonly %1, i32 %2, i32 %3) { @@ -96,18 +96,21 @@ ; CHECK: renamable $r2 = t2LSLrr killed renamable $r2, killed renamable $r12, 14 /* CC::al */, $noreg, def $cpsr ; CHECK: t2IT 0, 8, implicit-def $itstate ; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1 (%ir-block.17): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: $r3 = tMOVr $r0, 14 /* CC::al */, $noreg ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2 - ; CHECK: bb.1 (%ir-block.18): - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2 (%ir-block.18): + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r3 ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg ; CHECK: renamable $r3, renamable $q1 = MVE_VLDRWU32_post killed renamable $r3, 16, 0, $noreg ; CHECK: renamable $q0 = nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 0, killed $noreg ; CHECK: $r0 = tMOVr $r3, 14 /* CC::al */, $noreg - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 - ; CHECK: bb.2 (%ir-block.34): + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: bb.3 (%ir-block.34): ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0 (%ir-block.4): successors: %bb.1(0x80000000) @@ -129,6 +132,11 @@ renamable $r2 = t2LSLrr killed renamable $r2, killed renamable $r12, 14, $noreg, def $cpsr t2IT 0, 8, implicit-def $itstate tPOP_RET 0, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1 (%ir-block.17): + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $lr + renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14, $noreg renamable $r3 = t2BICri killed renamable $r3, 3, 14, $noreg, $noreg renamable $r12 = t2SUBri killed renamable $r3, 4, 14, $noreg, $noreg @@ -137,8 +145,8 @@ $r3 = tMOVr $r0, 14, $noreg t2DoLoopStart renamable $lr - bb.1 (%ir-block.18): - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2 (%ir-block.18): + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg @@ -151,10 +159,10 @@ MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr renamable $lr = t2LoopDec killed renamable $lr, 1 $r0 = tMOVr $r3, 14, $noreg - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2 (%ir-block.34): + bb.3 (%ir-block.34): tPOP_RET 14, $noreg, def $r7, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiblock-massive.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiblock-massive.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiblock-massive.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiblock-massive.mir @@ -1,26 +1,19 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=armv8.1m.main -mattr=+lob -run-pass=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s -# CHECK: for.body: -# CHECK-NOT: t2DLS -# CHECK-NOT: t2LEUpdate --- | - ; ModuleID = 'multiblock-massive.ll' - source_filename = "multiblock-massive.ll" - target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" - target triple = "thumbv8.1m.main" - define void @size_limit(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { entry: %cmp8 = icmp eq i32 %N, 0 br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader - + for.body.preheader: ; preds = %entry call void @llvm.set.loop.iterations.i32(i32 %N) br label %for.body - + for.cond.cleanup: ; preds = %for.end, %entry ret void - + for.body: ; preds = %for.end, %for.body.preheader %lsr.iv4 = phi i32* [ %b, %for.body.preheader ], [ %scevgep5, %for.end ] %lsr.iv2 = phi i32* [ %c, %for.body.preheader ], [ %scevgep3, %for.end ] @@ -33,13 +26,13 @@ store i32 %mul, i32* %lsr.iv1, align 4 %cmp = icmp ne i32 %0, 0 br i1 %cmp, label %middle.block, label %for.end - + middle.block: ; preds = %for.body %div = udiv i32 %1, %0 store i32 %div, i32* %lsr.iv1, align 4 %size.1 = call i32 @llvm.arm.space(i32 1024, i32 undef) br label %for.end - + for.end: ; preds = %middle.block, %for.body %scevgep = getelementptr i32, i32* %lsr.iv1, i32 1 %scevgep3 = getelementptr i32, i32* %lsr.iv2, i32 1 @@ -48,19 +41,19 @@ %exitcond = icmp eq i32 %lsr.iv.next, 0 br i1 %exitcond, label %for.cond.cleanup, label %for.body } - + ; Function Attrs: nounwind declare i32 @llvm.arm.space(i32 immarg, i32) #0 - + ; Function Attrs: noduplicate nounwind declare void @llvm.set.loop.iterations.i32(i32) #1 - + ; Function Attrs: noduplicate nounwind declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1 - + ; Function Attrs: nounwind declare void @llvm.stackprotector(i8*, i8**) #0 - + attributes #0 = { nounwind } attributes #1 = { noduplicate nounwind } @@ -102,20 +95,64 @@ restorePoint: '' fixedStack: [] stack: - - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } callSites: [] constants: [] machineFunctionInfo: {} body: | + ; CHECK-LABEL: name: size_limit + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r4 + ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8 + ; CHECK: tCMPi8 $r3, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 0, 8, implicit-def $itstate + ; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + ; CHECK: bb.1.for.body.preheader: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 + ; CHECK: $lr = tMOVr killed $r3, 14 /* CC::al */, $noreg + ; CHECK: tB %bb.2, 14 /* CC::al */, $noreg + ; CHECK: bb.2.for.end: + ; CHECK: successors: %bb.5(0x04000000), %bb.3(0x7c000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2 + ; CHECK: renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + ; CHECK: renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg + ; CHECK: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 4, 14 /* CC::al */, $noreg + ; CHECK: $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr + ; CHECK: tBcc %bb.3, 1 /* CC::ne */, killed $cpsr + ; CHECK: t2B %bb.5, 14 /* CC::al */, $noreg + ; CHECK: bb.3.for.body: + ; CHECK: successors: %bb.4(0x50000000), %bb.2(0x30000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2 + ; CHECK: dead renamable $r3 = SPACE 3072, undef renamable $r0 + ; CHECK: renamable $r3 = tLDRi renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.lsr.iv4) + ; CHECK: renamable $r12 = t2LDRi12 renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.lsr.iv2) + ; CHECK: tCMPi8 renamable $r3, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: renamable $r4 = nsw t2MUL renamable $r12, renamable $r3, 14 /* CC::al */, $noreg + ; CHECK: tSTRi killed renamable $r4, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.lsr.iv1) + ; CHECK: t2Bcc %bb.2, 0 /* CC::eq */, killed $cpsr + ; CHECK: bb.4.middle.block: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r12 + ; CHECK: renamable $r3 = t2UDIV killed renamable $r12, killed renamable $r3, 14 /* CC::al */, $noreg + ; CHECK: tSTRi killed renamable $r3, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.lsr.iv1) + ; CHECK: dead renamable $r3 = SPACE 1024, undef renamable $r0 + ; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg + ; CHECK: bb.5.for.cond.cleanup: + ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: - successors: %bb.2(0x80000000) + successors: %bb.1(0x80000000) liveins: $r0, $r1, $r2, $r3, $r4, $lr - + frame-setup tPUSH 14, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -123,43 +160,48 @@ tCMPi8 $r3, 0, 14, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tPOP_RET 0, killed $cpsr, def $r4, def $pc, implicit killed $itstate + + bb.1.for.body.preheader: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r4, $lr + $lr = tMOVr $r3, 14, $noreg t2DoLoopStart killed $r3 tB %bb.2, 14, $noreg - - bb.1.for.end: - successors: %bb.4(0x04000000), %bb.2(0x7c000000) + + bb.2.for.end: + successors: %bb.5(0x04000000), %bb.3(0x7c000000) liveins: $lr, $r0, $r1, $r2 - + renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 4, 14, $noreg renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 4, 14, $noreg renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 4, 14, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr - t2B %bb.4, 14, $noreg - - bb.2.for.body: - successors: %bb.3(0x50000000), %bb.1(0x30000000) + t2LoopEnd renamable $lr, %bb.3, implicit-def dead $cpsr + t2B %bb.5, 14, $noreg + + bb.3.for.body: + successors: %bb.4(0x50000000), %bb.2(0x30000000) liveins: $lr, $r0, $r1, $r2 - + dead renamable $r3 = SPACE 3072, undef renamable $r0 renamable $r3 = tLDRi renamable $r1, 0, 14, $noreg :: (load 4 from %ir.lsr.iv4) renamable $r12 = t2LDRi12 renamable $r2, 0, 14, $noreg :: (load 4 from %ir.lsr.iv2) tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr renamable $r4 = nsw t2MUL renamable $r12, renamable $r3, 14, $noreg tSTRi killed renamable $r4, renamable $r0, 0, 14, $noreg :: (store 4 into %ir.lsr.iv1) - t2Bcc %bb.1, 0, killed $cpsr - - bb.3.middle.block: - successors: %bb.1(0x80000000) + t2Bcc %bb.2, 0, killed $cpsr + + bb.4.middle.block: + successors: %bb.2(0x80000000) liveins: $lr, $r0, $r1, $r2, $r3, $r12 - + renamable $r3 = t2UDIV killed renamable $r12, killed renamable $r3, 14, $noreg tSTRi killed renamable $r3, renamable $r0, 0, 14, $noreg :: (store 4 into %ir.lsr.iv1) dead renamable $r3 = SPACE 1024, undef renamable $r0 - t2B %bb.1, 14, $noreg - - bb.4.for.cond.cleanup: + t2B %bb.2, 14, $noreg + + bb.5.for.cond.cleanup: tPOP_RET 14, $noreg, def $r4, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -8,6 +8,7 @@ ; CHECK-NEXT: itt eq ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: vmov.i32 q0, #0x0 @@ -17,7 +18,7 @@ ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB0_1: @ %vector.body +; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 ; CHECK-NEXT: adds r3, #4 @@ -26,8 +27,8 @@ ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrbt.u32 q2, [r1], #4 ; CHECK-NEXT: vmla.u32 q0, q2, r0 -; CHECK-NEXT: le lr, .LBB0_1 -; CHECK-NEXT: @ %bb.2: @ %middle.block +; CHECK-NEXT: le lr, .LBB0_2 +; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: pop {r7, pc} @@ -83,6 +84,7 @@ ; CHECK-NEXT: itt eq ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB1_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: vmov.i32 q0, #0x0 @@ -92,7 +94,7 @@ ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB1_1: @ %vector.body +; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 ; CHECK-NEXT: adds r3, #4 @@ -101,8 +103,8 @@ ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrht.s32 q2, [r1], #8 ; CHECK-NEXT: vmla.u32 q0, q2, r0 -; CHECK-NEXT: le lr, .LBB1_1 -; CHECK-NEXT: @ %bb.2: @ %middle.block +; CHECK-NEXT: le lr, .LBB1_2 +; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: pop {r7, pc} @@ -158,6 +160,7 @@ ; CHECK-NEXT: itt eq ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB2_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: vmov.i32 q0, #0x0 @@ -167,7 +170,7 @@ ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB2_1: @ %vector.body +; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 ; CHECK-NEXT: adds r3, #4 @@ -176,8 +179,8 @@ ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrbt.u32 q2, [r1], #4 ; CHECK-NEXT: vmla.u32 q0, q2, r0 -; CHECK-NEXT: le lr, .LBB2_1 -; CHECK-NEXT: @ %bb.2: @ %middle.block +; CHECK-NEXT: le lr, .LBB2_2 +; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: pop {r7, pc} @@ -233,6 +236,7 @@ ; CHECK-NEXT: itt eq ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB3_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: vmov.i32 q0, #0x0 @@ -242,7 +246,7 @@ ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB3_1: @ %vector.body +; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 ; CHECK-NEXT: adds r3, #4 @@ -251,8 +255,8 @@ ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrht.u32 q2, [r1], #8 ; CHECK-NEXT: vmla.u32 q0, q2, r0 -; CHECK-NEXT: le lr, .LBB3_1 -; CHECK-NEXT: @ %bb.2: @ %middle.block +; CHECK-NEXT: le lr, .LBB3_2 +; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: pop {r7, pc} @@ -308,6 +312,7 @@ ; CHECK-NEXT: itt eq ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB4_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: vmov.i32 q0, #0x0 @@ -317,7 +322,7 @@ ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB4_1: @ %vector.body +; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 ; CHECK-NEXT: adds r3, #4 @@ -326,8 +331,8 @@ ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q2, [r1], #16 ; CHECK-NEXT: vmla.u32 q0, q2, r0 -; CHECK-NEXT: le lr, .LBB4_1 -; CHECK-NEXT: @ %bb.2: @ %middle.block +; CHECK-NEXT: le lr, .LBB4_2 +; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: pop {r7, pc} @@ -618,17 +623,18 @@ ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} +; CHECK-NEXT: .LBB6_1: @ %vector.ph ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r12 -; CHECK-NEXT: .LBB6_1: @ %vector.body +; CHECK-NEXT: .LBB6_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrh.s32 q0, [r0], #8 ; CHECK-NEXT: vldrh.s32 q1, [r1], #8 ; CHECK-NEXT: vmlas.u32 q1, q0, r2 ; CHECK-NEXT: vstrw.32 q1, [r3], #16 -; CHECK-NEXT: letp lr, .LBB6_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB6_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} entry: %cmp10 = icmp eq i32 %N, 0 @@ -919,17 +925,18 @@ ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} +; CHECK-NEXT: .LBB8_1: @ %vector.ph ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r12 -; CHECK-NEXT: .LBB8_1: @ %vector.body +; CHECK-NEXT: .LBB8_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrh.u32 q0, [r0], #8 ; CHECK-NEXT: vldrh.u32 q1, [r1], #8 ; CHECK-NEXT: vmlas.u32 q1, q0, r2 ; CHECK-NEXT: vstrw.32 q1, [r3], #16 -; CHECK-NEXT: letp lr, .LBB8_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB8_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} entry: %cmp10 = icmp eq i32 %N, 0 @@ -1206,17 +1213,18 @@ ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB10_1: @ %vector.ph ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.16 lr, r3 -; CHECK-NEXT: .LBB10_1: @ %vector.body +; CHECK-NEXT: .LBB10_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r12, r12, #8 ; CHECK-NEXT: vldrb.u16 q0, [r1], #8 ; CHECK-NEXT: vldrb.u16 q1, [r2], #8 ; CHECK-NEXT: vmul.i16 q0, q1, q0 ; CHECK-NEXT: vstrh.16 q0, [r0], #16 -; CHECK-NEXT: letp lr, .LBB10_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB10_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %cmp10 = icmp eq i32 %N, 0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir @@ -3,14 +3,10 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=-lob %s -run-pass=arm-cp-islands --verify-machineinstrs -o - | FileCheck %s --check-prefix=CHECK-NOLOB --- | - target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" - target triple = "thumbv8.1m.main" - %struct.head_s = type { %struct.head_s*, %struct.data_s* } %struct.data_s = type { i16, i16 } - ; Function Attrs: norecurse nounwind readonly - define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr #0 { + define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr { entry: %idx = getelementptr inbounds %struct.data_s, %struct.data_s* %info, i32 0, i32 1 %0 = load i16, i16* %idx, align 2 @@ -67,16 +63,14 @@ br i1 %tobool10, label %return, label %land.rhs11 return: ; preds = %while.body19, %land.rhs11, %while.body, %land.rhs, %while.cond.preheader, %while.cond9.preheader - %retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ %list.addr.033, %land.rhs ], [ null, %while.body ], [ %list.addr.136, %land.rhs11 ], [ null, %while.body19 ] + %retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ null, %while.body ], [ %list.addr.033, %land.rhs ], [ null, %while.body19 ], [ %list.addr.136, %land.rhs11 ] ret %struct.head_s* %retval.0 } - attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+lob,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" } - ... --- name: search -alignment: 1 +alignment: 2 exposesReturnsTwice: false legalized: false regBankSelected: false @@ -115,184 +109,195 @@ body: | ; CHECK-LOB-LABEL: name: search ; CHECK-LOB: bb.0.entry: - ; CHECK-LOB: successors: %bb.1(0x50000000), %bb.4(0x30000000) + ; CHECK-LOB: successors: %bb.1(0x50000000), %bb.5(0x30000000) ; CHECK-LOB: liveins: $r0, $r1 ; CHECK-LOB: renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx) ; CHECK-LOB: t2CMPri renamable $r2, -1, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK-LOB: tBcc %bb.4, 13 /* CC::le */, killed $cpsr + ; CHECK-LOB: tBcc %bb.5, 13 /* CC::le */, killed $cpsr ; CHECK-LOB: bb.1.while.cond.preheader: - ; CHECK-LOB: successors: %bb.8(0x30000000), %bb.2(0x50000000) + ; CHECK-LOB: successors: %bb.9(0x30000000), %bb.2(0x50000000) ; CHECK-LOB: liveins: $r0, $r2 - ; CHECK-LOB: tCBZ $r0, %bb.8 + ; CHECK-LOB: tCBZ $r0, %bb.9 ; CHECK-LOB: bb.2.land.rhs.preheader: ; CHECK-LOB: successors: %bb.3(0x80000000) ; CHECK-LOB: liveins: $r0, $r2 ; CHECK-LOB: renamable $r1 = tUXTH killed renamable $r2, 14 /* CC::al */, $noreg ; CHECK-LOB: bb.3.land.rhs: - ; CHECK-LOB: successors: %bb.8(0x04000000), %bb.3(0x7c000000) + ; CHECK-LOB: successors: %bb.4(0x80000000) ; CHECK-LOB: liveins: $r0, $r1 ; CHECK-LOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info2) ; CHECK-LOB: renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx3) ; CHECK-LOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-LOB: t2IT 0, 8, implicit-def $itstate ; CHECK-LOB: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK-LOB: bb.4.while.body: + ; CHECK-LOB: successors: %bb.9(0x04000000), %bb.3(0x7c000000) + ; CHECK-LOB: liveins: $r0, $r1 ; CHECK-LOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next4) - ; CHECK-LOB: tCBNZ $r0, %bb.8 + ; CHECK-LOB: tCBNZ $r0, %bb.9 ; CHECK-LOB: t2LE %bb.3 - ; CHECK-LOB: bb.4.while.cond9.preheader: - ; CHECK-LOB: successors: %bb.8(0x30000000), %bb.5(0x50000000) + ; CHECK-LOB: bb.5.while.cond9.preheader: + ; CHECK-LOB: successors: %bb.9(0x30000000), %bb.6(0x50000000) ; CHECK-LOB: liveins: $r0, $r1 - ; CHECK-LOB: tCBZ $r0, %bb.8 - ; CHECK-LOB: bb.5.land.rhs11.lr.ph: - ; CHECK-LOB: successors: %bb.6(0x80000000) + ; CHECK-LOB: tCBZ $r0, %bb.9 + ; CHECK-LOB: bb.6.land.rhs11.lr.ph: + ; CHECK-LOB: successors: %bb.7(0x80000000) ; CHECK-LOB: liveins: $r0, $r1 ; CHECK-LOB: renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16143) - ; CHECK-LOB: bb.6.land.rhs11: - ; CHECK-LOB: successors: %bb.9(0x04000000), %bb.7(0x7c000000) + ; CHECK-LOB: bb.7.land.rhs11: + ; CHECK-LOB: successors: %bb.10(0x04000000), %bb.8(0x7c000000) ; CHECK-LOB: liveins: $r0, $r1 ; CHECK-LOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info12) ; CHECK-LOB: renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.data165, align 2) ; CHECK-LOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK-LOB: tBcc %bb.9, 0 /* CC::eq */, killed $cpsr - ; CHECK-LOB: bb.7.while.body19: - ; CHECK-LOB: successors: %bb.8(0x04000000), %bb.6(0x7c000000) + ; CHECK-LOB: tBcc %bb.10, 0 /* CC::eq */, killed $cpsr + ; CHECK-LOB: bb.8.while.body19: + ; CHECK-LOB: successors: %bb.9(0x04000000), %bb.7(0x7c000000) ; CHECK-LOB: liveins: $r0, $r1 ; CHECK-LOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next206) - ; CHECK-LOB: tCBZ $r0, %bb.8 - ; CHECK-LOB: t2LE %bb.6 - ; CHECK-LOB: bb.8: - ; CHECK-LOB: successors: %bb.9(0x80000000) + ; CHECK-LOB: tCBZ $r0, %bb.9 + ; CHECK-LOB: t2LE %bb.7 + ; CHECK-LOB: bb.9: + ; CHECK-LOB: successors: %bb.10(0x80000000) ; CHECK-LOB: renamable $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg - ; CHECK-LOB: bb.9.return: + ; CHECK-LOB: bb.10.return: ; CHECK-LOB: liveins: $r0 ; CHECK-LOB: tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 ; CHECK-NOLOB-LABEL: name: search ; CHECK-NOLOB: bb.0.entry: - ; CHECK-NOLOB: successors: %bb.1(0x50000000), %bb.4(0x30000000) + ; CHECK-NOLOB: successors: %bb.1(0x50000000), %bb.5(0x30000000) ; CHECK-NOLOB: liveins: $r0, $r1 ; CHECK-NOLOB: renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx) ; CHECK-NOLOB: t2CMPri renamable $r2, -1, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK-NOLOB: tBcc %bb.4, 13 /* CC::le */, killed $cpsr + ; CHECK-NOLOB: tBcc %bb.5, 13 /* CC::le */, killed $cpsr ; CHECK-NOLOB: bb.1.while.cond.preheader: - ; CHECK-NOLOB: successors: %bb.8(0x30000000), %bb.2(0x50000000) + ; CHECK-NOLOB: successors: %bb.9(0x30000000), %bb.2(0x50000000) ; CHECK-NOLOB: liveins: $r0, $r2 - ; CHECK-NOLOB: tCBZ $r0, %bb.8 + ; CHECK-NOLOB: tCBZ $r0, %bb.9 ; CHECK-NOLOB: bb.2.land.rhs.preheader: ; CHECK-NOLOB: successors: %bb.3(0x80000000) ; CHECK-NOLOB: liveins: $r0, $r2 ; CHECK-NOLOB: renamable $r1 = tUXTH killed renamable $r2, 14 /* CC::al */, $noreg ; CHECK-NOLOB: bb.3.land.rhs: - ; CHECK-NOLOB: successors: %bb.8(0x04000000), %bb.3(0x7c000000) + ; CHECK-NOLOB: successors: %bb.4(0x80000000) ; CHECK-NOLOB: liveins: $r0, $r1 ; CHECK-NOLOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info2) ; CHECK-NOLOB: renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx3) ; CHECK-NOLOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-NOLOB: t2IT 0, 8, implicit-def $itstate ; CHECK-NOLOB: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK-NOLOB: bb.4.while.body: + ; CHECK-NOLOB: successors: %bb.9(0x04000000), %bb.3(0x7c000000) + ; CHECK-NOLOB: liveins: $r0, $r1 ; CHECK-NOLOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next4) ; CHECK-NOLOB: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-NOLOB: tBcc %bb.3, 0 /* CC::eq */, killed $cpsr - ; CHECK-NOLOB: tB %bb.8, 14 /* CC::al */, $noreg - ; CHECK-NOLOB: bb.4.while.cond9.preheader: - ; CHECK-NOLOB: successors: %bb.8(0x30000000), %bb.5(0x50000000) + ; CHECK-NOLOB: tB %bb.9, 14 /* CC::al */, $noreg + ; CHECK-NOLOB: bb.5.while.cond9.preheader: + ; CHECK-NOLOB: successors: %bb.9(0x30000000), %bb.6(0x50000000) ; CHECK-NOLOB: liveins: $r0, $r1 - ; CHECK-NOLOB: tCBZ $r0, %bb.8 - ; CHECK-NOLOB: bb.5.land.rhs11.lr.ph: - ; CHECK-NOLOB: successors: %bb.6(0x80000000) + ; CHECK-NOLOB: tCBZ $r0, %bb.9 + ; CHECK-NOLOB: bb.6.land.rhs11.lr.ph: + ; CHECK-NOLOB: successors: %bb.7(0x80000000) ; CHECK-NOLOB: liveins: $r0, $r1 ; CHECK-NOLOB: renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16143) - ; CHECK-NOLOB: bb.6.land.rhs11: - ; CHECK-NOLOB: successors: %bb.9(0x04000000), %bb.7(0x7c000000) + ; CHECK-NOLOB: bb.7.land.rhs11: + ; CHECK-NOLOB: successors: %bb.10(0x04000000), %bb.8(0x7c000000) ; CHECK-NOLOB: liveins: $r0, $r1 ; CHECK-NOLOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info12) ; CHECK-NOLOB: renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.data165, align 2) ; CHECK-NOLOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK-NOLOB: tBcc %bb.9, 0 /* CC::eq */, killed $cpsr - ; CHECK-NOLOB: bb.7.while.body19: - ; CHECK-NOLOB: successors: %bb.8(0x04000000), %bb.6(0x7c000000) + ; CHECK-NOLOB: tBcc %bb.10, 0 /* CC::eq */, killed $cpsr + ; CHECK-NOLOB: bb.8.while.body19: + ; CHECK-NOLOB: successors: %bb.9(0x04000000), %bb.7(0x7c000000) ; CHECK-NOLOB: liveins: $r0, $r1 ; CHECK-NOLOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next206) ; CHECK-NOLOB: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK-NOLOB: tBcc %bb.6, 1 /* CC::ne */, killed $cpsr - ; CHECK-NOLOB: bb.8: - ; CHECK-NOLOB: successors: %bb.9(0x80000000) + ; CHECK-NOLOB: tBcc %bb.7, 1 /* CC::ne */, killed $cpsr + ; CHECK-NOLOB: bb.9: + ; CHECK-NOLOB: successors: %bb.10(0x80000000) ; CHECK-NOLOB: renamable $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg - ; CHECK-NOLOB: bb.9.return: + ; CHECK-NOLOB: bb.10.return: ; CHECK-NOLOB: liveins: $r0 ; CHECK-NOLOB: tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 bb.0.entry: successors: %bb.5(0x50000000), %bb.1(0x30000000) liveins: $r0, $r1 - renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14, $noreg :: (load 2 from %ir.idx) - t2CMPri renamable $r2, -1, 14, $noreg, implicit-def $cpsr - t2Bcc %bb.1, 13, killed $cpsr + renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx) + t2CMPri renamable $r2, -1, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2Bcc %bb.1, 13 /* CC::le */, killed $cpsr bb.5.while.cond.preheader: - successors: %bb.8(0x30000000), %bb.6(0x50000000) + successors: %bb.9(0x30000000), %bb.6(0x50000000) liveins: $r0, $r2 - tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr - t2Bcc %bb.8, 0, killed $cpsr + tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2Bcc %bb.9, 0 /* CC::eq */, killed $cpsr bb.6.land.rhs.preheader: successors: %bb.7(0x80000000) liveins: $r0, $r2 - renamable $r1 = tUXTH killed renamable $r2, 14, $noreg + renamable $r1 = tUXTH killed renamable $r2, 14 /* CC::al */, $noreg bb.7.land.rhs: - successors: %bb.8(0x04000000), %bb.7(0x7c000000) + successors: %bb.8(0x80000000) liveins: $r0, $r1 - renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info2) - renamable $r2 = tLDRHi killed renamable $r2, 1, 14, $noreg :: (load 2 from %ir.idx3) - tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr + renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info2) + renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx3) + tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate - tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate - renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next4) - tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr - t2Bcc %bb.7, 0, killed $cpsr - t2B %bb.8, 14, $noreg + tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + + bb.8.while.body: + successors: %bb.9(0x04000000), %bb.7(0x7c000000) + liveins: $r0, $r1 + + renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next4) + tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2Bcc %bb.7, 0 /* CC::eq */, killed $cpsr + t2B %bb.9, 14 /* CC::al */, $noreg bb.1.while.cond9.preheader: - successors: %bb.8(0x30000000), %bb.2(0x50000000) + successors: %bb.9(0x30000000), %bb.2(0x50000000) liveins: $r0, $r1 - tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr - t2Bcc %bb.8, 0, killed $cpsr + tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2Bcc %bb.9, 0 /* CC::eq */, killed $cpsr bb.2.land.rhs11.lr.ph: successors: %bb.3(0x80000000) liveins: $r0, $r1 - renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14, $noreg :: (load 2 from %ir.data16143) + renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16143) bb.3.land.rhs11: - successors: %bb.9(0x04000000), %bb.4(0x7c000000) + successors: %bb.10(0x04000000), %bb.4(0x7c000000) liveins: $r0, $r1 - renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info12) - renamable $r2 = tLDRBi killed renamable $r2, 0, 14, $noreg :: (load 1 from %ir.data165, align 2) - tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr - t2Bcc %bb.9, 0, killed $cpsr + renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info12) + renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.data165, align 2) + tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2Bcc %bb.10, 0 /* CC::eq */, killed $cpsr bb.4.while.body19: - successors: %bb.8(0x04000000), %bb.3(0x7c000000) + successors: %bb.9(0x04000000), %bb.3(0x7c000000) liveins: $r0, $r1 - renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next206) - tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr - t2Bcc %bb.3, 1, killed $cpsr + renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next206) + tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2Bcc %bb.3, 1 /* CC::ne */, killed $cpsr - bb.8: - successors: %bb.9(0x80000000) + bb.9: + successors: %bb.10(0x80000000) - renamable $r0, dead $cpsr = tMOVi8 0, 14, $noreg + renamable $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg - bb.9.return: + bb.10.return: liveins: $r0 - tBX_RET 14, $noreg, implicit killed $r0 + tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir @@ -1,44 +1,40 @@ -# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-cp-islands --verify-machineinstrs -o - | FileCheck %s -# CHECK-NOT: t2LE +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-cp-islands %s -o - --verify-machineinstrs | FileCheck %s --- | - target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" - target triple = "thumbv8.1m.main-unknown-unknown" - %struct.head_s = type { %struct.head_s*, %struct.data_s* } %struct.data_s = type { i16, i16 } - - ; Function Attrs: norecurse nounwind readonly - define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr #0 { + + define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr { entry: %idx = getelementptr inbounds %struct.data_s, %struct.data_s* %info, i32 0, i32 1 %tmp = load i16, i16* %idx, align 2 %cmp = icmp sgt i16 %tmp, -1 br i1 %cmp, label %while.cond.preheader, label %while.cond9.preheader - + while.cond9.preheader: ; preds = %entry %0 = icmp eq %struct.head_s* %list, null br i1 %0, label %return, label %land.rhs11.lr.ph - + land.rhs11.lr.ph: ; preds = %while.cond9.preheader %data16143 = bitcast %struct.data_s* %info to i16* %tmp1 = load i16, i16* %data16143, align 2 %conv15 = sext i16 %tmp1 to i32 br label %land.rhs11 - + while.cond.preheader: ; preds = %entry %1 = icmp eq %struct.head_s* %list, null br i1 %1, label %return, label %land.rhs.preheader - + land.rhs.preheader: ; preds = %while.cond.preheader br label %land.rhs - + while.body: ; preds = %land.rhs %next4 = bitcast %struct.head_s* %list.addr.033 to %struct.head_s** %tmp4 = load %struct.head_s*, %struct.head_s** %next4, align 4 %tobool = icmp eq %struct.head_s* %tmp4, null br i1 %tobool, label %return, label %land.rhs - + land.rhs: ; preds = %land.rhs.preheader, %while.body %list.addr.033 = phi %struct.head_s* [ %tmp4, %while.body ], [ %list, %land.rhs.preheader ] %info2 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.033, i32 0, i32 1 @@ -47,13 +43,13 @@ %tmp3 = load i16, i16* %idx3, align 2 %cmp7 = icmp eq i16 %tmp3, %tmp br i1 %cmp7, label %return, label %while.body - + while.body19: ; preds = %land.rhs11 %next205 = bitcast %struct.head_s* %list.addr.136 to %struct.head_s** %tmp8 = load %struct.head_s*, %struct.head_s** %next205, align 4 %tobool10 = icmp eq %struct.head_s* %tmp8, null br i1 %tobool10, label %return, label %land.rhs11 - + land.rhs11: ; preds = %while.body19, %land.rhs11.lr.ph %list.addr.136 = phi %struct.head_s* [ %list, %land.rhs11.lr.ph ], [ %tmp8, %while.body19 ] %info12 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.136, i32 0, i32 1 @@ -64,18 +60,16 @@ %and = zext i16 %2 to i32 %cmp16 = icmp eq i32 %and, %conv15 br i1 %cmp16, label %return, label %while.body19 - + return: ; preds = %land.rhs11, %while.body19, %land.rhs, %while.body, %while.cond.preheader, %while.cond9.preheader - %retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ %list.addr.033, %land.rhs ], [ null, %while.body ], [ %list.addr.136, %land.rhs11 ], [ null, %while.body19 ] + %retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ null, %while.body ], [ %list.addr.033, %land.rhs ], [ null, %while.body19 ], [ %list.addr.136, %land.rhs11 ] ret %struct.head_s* %retval.0 } - - attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+lob,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" } - + ... --- name: search -alignment: 1 +alignment: 2 exposesReturnsTwice: false legalized: false regBankSelected: false @@ -112,73 +106,159 @@ constants: [] machineFunctionInfo: {} body: | + ; CHECK-LABEL: name: search + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x50000000), %bb.6(0x30000000) + ; CHECK: liveins: $r0, $r1 + ; CHECK: renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx) + ; CHECK: t2CMPri renamable $r2, -1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: tBcc %bb.6, 13 /* CC::le */, killed $cpsr + ; CHECK: bb.1.while.cond.preheader: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r2 + ; CHECK: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 0, 4, implicit-def $itstate + ; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate + ; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK: tB %bb.2, 14 /* CC::al */, $noreg + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: liveins: $r0, $r2 + ; CHECK: renamable $r1 = tUXTH killed renamable $r2, 14 /* CC::al */, $noreg + ; CHECK: bb.3.land.rhs: + ; CHECK: successors: %bb.5(0x04000000), %bb.4(0x7c000000) + ; CHECK: liveins: $r0, $r1 + ; CHECK: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info2) + ; CHECK: renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx3) + ; CHECK: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: tBcc %bb.5, 0 /* CC::eq */, killed $cpsr + ; CHECK: bb.4.while.body: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: liveins: $r0, $r1 + ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next4) + ; CHECK: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 0, 4, implicit-def $itstate + ; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate + ; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK: tB %bb.3, 14 /* CC::al */, $noreg + ; CHECK: bb.5.return: + ; CHECK: liveins: $r0 + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 + ; CHECK: bb.6.while.cond9.preheader: + ; CHECK: successors: %bb.7(0x80000000) + ; CHECK: liveins: $r0, $r1 + ; CHECK: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 0, 4, implicit-def $itstate + ; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate + ; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK: tB %bb.7, 14 /* CC::al */, $noreg + ; CHECK: bb.7.land.rhs11.lr.ph: + ; CHECK: successors: %bb.8(0x80000000) + ; CHECK: liveins: $r0, $r1 + ; CHECK: renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16143) + ; CHECK: bb.8.land.rhs11: + ; CHECK: successors: %bb.9(0x80000000) + ; CHECK: liveins: $r0, $r1 + ; CHECK: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info12) + ; CHECK: renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.data166, align 2) + ; CHECK: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 0, 8, implicit-def $itstate + ; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK: tB %bb.9, 14 /* CC::al */, $noreg + ; CHECK: bb.9.while.body19: + ; CHECK: successors: %bb.8(0x80000000) + ; CHECK: liveins: $r0, $r1 + ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next205) + ; CHECK: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2IT 0, 4, implicit-def $itstate + ; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate + ; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK: tB %bb.8, 14 /* CC::al */, $noreg bb.0.entry: - successors: %bb.3(0x50000000), %bb.1(0x30000000) + successors: %bb.2(0x50000000), %bb.1(0x30000000) liveins: $r0, $r1 - - renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14, $noreg :: (load 2 from %ir.idx) - t2CMPri renamable $r2, -1, 14, $noreg, implicit-def $cpsr - t2Bcc %bb.1, 13, killed $cpsr - - bb.3.while.cond.preheader: - successors: %bb.4(0x80000000) + + renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx) + t2CMPri renamable $r2, -1, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2Bcc %bb.1, 13 /* CC::le */, killed $cpsr + + bb.2.while.cond.preheader: + successors: %bb.3(0x50000000) liveins: $r0, $r2 - - tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr + + tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 4, implicit-def $itstate - renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate - tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate - renamable $r1 = tUXTH killed renamable $r2, 14, $noreg - + renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate + tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + t2B %bb.3, 14 /* CC::al */, $noreg + + bb.3: + successors: %bb.4(0x80000000) + liveins: $r0, $r2 + + renamable $r1 = tUXTH killed renamable $r2, 14 /* CC::al */, $noreg + bb.4.land.rhs: - successors: %bb.6(0x04000000), %bb.5(0x7c000000) + successors: %bb.9(0x04000000), %bb.5(0x7c000000) liveins: $r0, $r1 - - renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info2) - renamable $r2 = tLDRHi killed renamable $r2, 1, 14, $noreg :: (load 2 from %ir.idx3) - tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr - t2Bcc %bb.6, 0, killed $cpsr - + + renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info2) + renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx3) + tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2Bcc %bb.9, 0 /* CC::eq */, killed $cpsr + bb.5.while.body: successors: %bb.4(0x7c000000) liveins: $r0, $r1 - - renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next4) - tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr + + renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next4) + tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 4, implicit-def $itstate - renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate - tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate - t2B %bb.4, 14, $noreg - - bb.6.return: + renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate + tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + t2B %bb.4, 14 /* CC::al */, $noreg + + bb.9.return: liveins: $r0 - - tBX_RET 14, $noreg, implicit $r0 - + + tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 + bb.1.while.cond9.preheader: - successors: %bb.2(0x80000000) + successors: %bb.7(0x50000000) liveins: $r0, $r1 - - tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr + + tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 4, implicit-def $itstate - renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate - tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate - renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14, $noreg :: (load 2 from %ir.data16143) - - bb.2.land.rhs11: - successors: %bb.2(0x7c000000) + renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate + tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + t2B %bb.7, 14 /* CC::al */, $noreg + + bb.7.land.rhs11.lr.ph: + successors: %bb.8(0x80000000) + liveins: $r0, $r1 + + renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16143) + + bb.8.land.rhs11: + successors: %bb.6(0x80000000) liveins: $r0, $r1 - - renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info12) - renamable $r2 = tLDRBi killed renamable $r2, 0, 14, $noreg :: (load 1 from %ir.data166, align 2) - tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr + + renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info12) + renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.data166, align 2) + tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate - tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate - renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next205) - tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr + tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + t2B %bb.6, 14 /* CC::al */, $noreg + + bb.6.while.body19: + successors: %bb.8(0x7c000000) + liveins: $r0, $r1 + + renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next205) + tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 4, implicit-def $itstate - renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate - tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate - t2B %bb.2, 14, $noreg + renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate + tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + t2B %bb.8, 14 /* CC::al */, $noreg ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir @@ -1,49 +1,39 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-cp-islands --verify-machineinstrs -o - | FileCheck %s --check-prefix=CHECK-LOB # RUN: llc -mtriple=thumbv8.1m.main -mattr=-lob %s -run-pass=arm-cp-islands --verify-machineinstrs -o - | FileCheck %s --check-prefix=CHECK-NOLOB -# CHECK-NOLOB-NOT: t2LE - -# CHECK-LOB: bb.3.land.rhs: -# CHECK-LOB: tCBZ $r0, %bb.8 -# CHECK-LOB: t2LE %bb.3 -# CHECK-LOB: bb.6.land.rhs11: -# CHECK-LOB: bb.7.while.body19: -# CHECK-LOB: tCBZ $r0, %bb.8 -# CHECK-LOB: t2LE %bb.6 -# CHECK-LOB: bb.8: - --- | target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-unknown-unknown" - + %struct.head_s = type { %struct.head_s*, %struct.data_s* } %struct.data_s = type { i16, i16 } - + ; Function Attrs: norecurse nounwind readonly - define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr #0 { + define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr { entry: %idx = getelementptr inbounds %struct.data_s, %struct.data_s* %info, i32 0, i32 1 %0 = load i16, i16* %idx, align 2 %cmp = icmp sgt i16 %0, -1 br i1 %cmp, label %while.cond.preheader, label %while.cond9.preheader - + while.cond9.preheader: ; preds = %entry %1 = icmp eq %struct.head_s* %list, null br i1 %1, label %return, label %land.rhs11.lr.ph - + land.rhs11.lr.ph: ; preds = %while.cond9.preheader %data16143 = bitcast %struct.data_s* %info to i16* %2 = load i16, i16* %data16143, align 2 %conv15 = sext i16 %2 to i32 br label %land.rhs11 - + while.cond.preheader: ; preds = %entry %3 = icmp eq %struct.head_s* %list, null br i1 %3, label %return, label %land.rhs.preheader - + land.rhs.preheader: ; preds = %while.cond.preheader br label %land.rhs - + land.rhs: ; preds = %land.rhs.preheader, %while.body %list.addr.033 = phi %struct.head_s* [ %6, %while.body ], [ %list, %land.rhs.preheader ] %info2 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.033, i32 0, i32 1 @@ -52,13 +42,13 @@ %5 = load i16, i16* %idx3, align 2 %cmp7 = icmp eq i16 %5, %0 br i1 %cmp7, label %return, label %while.body - + while.body: ; preds = %land.rhs %next4 = bitcast %struct.head_s* %list.addr.033 to %struct.head_s** %6 = load %struct.head_s*, %struct.head_s** %next4, align 4 %tobool = icmp eq %struct.head_s* %6, null br i1 %tobool, label %return, label %land.rhs - + land.rhs11: ; preds = %while.body19, %land.rhs11.lr.ph %list.addr.136 = phi %struct.head_s* [ %list, %land.rhs11.lr.ph ], [ %10, %while.body19 ] %info12 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.136, i32 0, i32 1 @@ -69,20 +59,18 @@ %and = zext i16 %9 to i32 %cmp16 = icmp eq i32 %and, %conv15 br i1 %cmp16, label %return, label %while.body19 - + while.body19: ; preds = %land.rhs11 %next206 = bitcast %struct.head_s* %list.addr.136 to %struct.head_s** %10 = load %struct.head_s*, %struct.head_s** %next206, align 4 %tobool10 = icmp eq %struct.head_s* %10, null br i1 %tobool10, label %return, label %land.rhs11 - + return: ; preds = %while.body19, %land.rhs11, %while.body, %land.rhs, %while.cond.preheader, %while.cond9.preheader %retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ %list.addr.033, %land.rhs ], [ null, %while.body ], [ %list.addr.136, %land.rhs11 ], [ null, %while.body19 ] ret %struct.head_s* %retval.0 } - - attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+lob,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" } - + ... --- name: search @@ -123,79 +111,197 @@ constants: [] machineFunctionInfo: {} body: | + ; CHECK-LOB-LABEL: name: search + ; CHECK-LOB: bb.0.entry: + ; CHECK-LOB: successors: %bb.1(0x50000000), %bb.5(0x30000000) + ; CHECK-LOB: liveins: $r0, $r1 + ; CHECK-LOB: renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx) + ; CHECK-LOB: t2CMPri renamable $r2, -1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK-LOB: tBcc %bb.5, 13 /* CC::le */, killed $cpsr + ; CHECK-LOB: bb.1.while.cond.preheader: + ; CHECK-LOB: successors: %bb.9(0x30000000), %bb.2(0x50000000) + ; CHECK-LOB: liveins: $r0, $r2 + ; CHECK-LOB: tCBZ $r0, %bb.9 + ; CHECK-LOB: bb.2.land.rhs.preheader: + ; CHECK-LOB: successors: %bb.3(0x80000000) + ; CHECK-LOB: liveins: $r0, $r2 + ; CHECK-LOB: renamable $r1 = tUXTH killed renamable $r2, 14 /* CC::al */, $noreg + ; CHECK-LOB: bb.3.land.rhs: + ; CHECK-LOB: successors: %bb.4(0x80000000) + ; CHECK-LOB: liveins: $r0, $r1 + ; CHECK-LOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info2) + ; CHECK-LOB: renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx3) + ; CHECK-LOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK-LOB: t2IT 0, 8, implicit-def $itstate + ; CHECK-LOB: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK-LOB: bb.4.while.body: + ; CHECK-LOB: successors: %bb.9(0x04000000), %bb.3(0x7c000000) + ; CHECK-LOB: liveins: $r0, $r1 + ; CHECK-LOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next4) + ; CHECK-LOB: tCBZ $r0, %bb.9 + ; CHECK-LOB: t2LE %bb.3 + ; CHECK-LOB: bb.5.while.cond9.preheader: + ; CHECK-LOB: successors: %bb.9(0x30000000), %bb.6(0x50000000) + ; CHECK-LOB: liveins: $r0, $r1 + ; CHECK-LOB: tCBZ $r0, %bb.9 + ; CHECK-LOB: bb.6.land.rhs11.lr.ph: + ; CHECK-LOB: successors: %bb.7(0x80000000) + ; CHECK-LOB: liveins: $r0, $r1 + ; CHECK-LOB: renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16143) + ; CHECK-LOB: bb.7.land.rhs11: + ; CHECK-LOB: successors: %bb.10(0x04000000), %bb.8(0x7c000000) + ; CHECK-LOB: liveins: $r0, $r1 + ; CHECK-LOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info12) + ; CHECK-LOB: renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.data165, align 2) + ; CHECK-LOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK-LOB: tBcc %bb.10, 0 /* CC::eq */, killed $cpsr + ; CHECK-LOB: bb.8.while.body19: + ; CHECK-LOB: successors: %bb.9(0x04000000), %bb.7(0x7c000000) + ; CHECK-LOB: liveins: $r0, $r1 + ; CHECK-LOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next206) + ; CHECK-LOB: tCBZ $r0, %bb.9 + ; CHECK-LOB: t2LE %bb.7 + ; CHECK-LOB: bb.9: + ; CHECK-LOB: successors: %bb.10(0x80000000) + ; CHECK-LOB: renamable $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg + ; CHECK-LOB: bb.10.return: + ; CHECK-LOB: liveins: $r0 + ; CHECK-LOB: tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 + ; CHECK-NOLOB-LABEL: name: search + ; CHECK-NOLOB: bb.0.entry: + ; CHECK-NOLOB: successors: %bb.1(0x50000000), %bb.5(0x30000000) + ; CHECK-NOLOB: liveins: $r0, $r1 + ; CHECK-NOLOB: renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx) + ; CHECK-NOLOB: t2CMPri renamable $r2, -1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK-NOLOB: tBcc %bb.5, 13 /* CC::le */, killed $cpsr + ; CHECK-NOLOB: bb.1.while.cond.preheader: + ; CHECK-NOLOB: successors: %bb.9(0x30000000), %bb.2(0x50000000) + ; CHECK-NOLOB: liveins: $r0, $r2 + ; CHECK-NOLOB: tCBZ $r0, %bb.9 + ; CHECK-NOLOB: bb.2.land.rhs.preheader: + ; CHECK-NOLOB: successors: %bb.3(0x80000000) + ; CHECK-NOLOB: liveins: $r0, $r2 + ; CHECK-NOLOB: renamable $r1 = tUXTH killed renamable $r2, 14 /* CC::al */, $noreg + ; CHECK-NOLOB: bb.3.land.rhs: + ; CHECK-NOLOB: successors: %bb.4(0x80000000) + ; CHECK-NOLOB: liveins: $r0, $r1 + ; CHECK-NOLOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info2) + ; CHECK-NOLOB: renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx3) + ; CHECK-NOLOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK-NOLOB: t2IT 0, 8, implicit-def $itstate + ; CHECK-NOLOB: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK-NOLOB: bb.4.while.body: + ; CHECK-NOLOB: successors: %bb.9(0x04000000), %bb.3(0x7c000000) + ; CHECK-NOLOB: liveins: $r0, $r1 + ; CHECK-NOLOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next4) + ; CHECK-NOLOB: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK-NOLOB: tBcc %bb.3, 1 /* CC::ne */, killed $cpsr + ; CHECK-NOLOB: tB %bb.9, 14 /* CC::al */, $noreg + ; CHECK-NOLOB: bb.5.while.cond9.preheader: + ; CHECK-NOLOB: successors: %bb.9(0x30000000), %bb.6(0x50000000) + ; CHECK-NOLOB: liveins: $r0, $r1 + ; CHECK-NOLOB: tCBZ $r0, %bb.9 + ; CHECK-NOLOB: bb.6.land.rhs11.lr.ph: + ; CHECK-NOLOB: successors: %bb.7(0x80000000) + ; CHECK-NOLOB: liveins: $r0, $r1 + ; CHECK-NOLOB: renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16143) + ; CHECK-NOLOB: bb.7.land.rhs11: + ; CHECK-NOLOB: successors: %bb.10(0x04000000), %bb.8(0x7c000000) + ; CHECK-NOLOB: liveins: $r0, $r1 + ; CHECK-NOLOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info12) + ; CHECK-NOLOB: renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.data165, align 2) + ; CHECK-NOLOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK-NOLOB: tBcc %bb.10, 0 /* CC::eq */, killed $cpsr + ; CHECK-NOLOB: bb.8.while.body19: + ; CHECK-NOLOB: successors: %bb.9(0x04000000), %bb.7(0x7c000000) + ; CHECK-NOLOB: liveins: $r0, $r1 + ; CHECK-NOLOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next206) + ; CHECK-NOLOB: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK-NOLOB: tBcc %bb.7, 1 /* CC::ne */, killed $cpsr + ; CHECK-NOLOB: bb.9: + ; CHECK-NOLOB: successors: %bb.10(0x80000000) + ; CHECK-NOLOB: renamable $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg + ; CHECK-NOLOB: bb.10.return: + ; CHECK-NOLOB: liveins: $r0 + ; CHECK-NOLOB: tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 bb.0.entry: successors: %bb.5(0x50000000), %bb.1(0x30000000) liveins: $r0, $r1 - + renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14, $noreg :: (load 2 from %ir.idx) t2CMPri renamable $r2, -1, 14, $noreg, implicit-def $cpsr t2Bcc %bb.1, 13, killed $cpsr - + bb.5.while.cond.preheader: successors: %bb.8(0x30000000), %bb.6(0x50000000) liveins: $r0, $r2 - + tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.8, 0, killed $cpsr - + bb.6.land.rhs.preheader: successors: %bb.7(0x80000000) liveins: $r0, $r2 - + renamable $r1 = tUXTH killed renamable $r2, 14, $noreg - + bb.7.land.rhs: - successors: %bb.8(0x04000000), %bb.7(0x7c000000) + successors: %bb.10(0x80000000) liveins: $r0, $r1 - + renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info2) renamable $r2 = tLDRHi killed renamable $r2, 1, 14, $noreg :: (load 2 from %ir.idx3) tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate + + bb.10.while.body: + successors: %bb.8(0x04000000), %bb.7(0x7c000000) + liveins: $r0, $r1 + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next4) tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.7, 1, killed $cpsr t2B %bb.8, 14, $noreg - + bb.1.while.cond9.preheader: successors: %bb.8(0x30000000), %bb.2(0x50000000) liveins: $r0, $r1 - + tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.8, 0, killed $cpsr - + bb.2.land.rhs11.lr.ph: successors: %bb.3(0x80000000) liveins: $r0, $r1 - + renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14, $noreg :: (load 2 from %ir.data16143) - + bb.3.land.rhs11: successors: %bb.9(0x04000000), %bb.4(0x7c000000) liveins: $r0, $r1 - + renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info12) renamable $r2 = tLDRBi killed renamable $r2, 0, 14, $noreg :: (load 1 from %ir.data165, align 2) tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr t2Bcc %bb.9, 0, killed $cpsr - + bb.4.while.body19: successors: %bb.8(0x04000000), %bb.3(0x7c000000) liveins: $r0, $r1 - + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next206) tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.3, 1, killed $cpsr - + bb.8: successors: %bb.9(0x80000000) - + renamable $r0, dead $cpsr = tMOVi8 0, 14, $noreg - + bb.9.return: liveins: $r0 - + tBX_RET 14, $noreg, implicit killed $r0 ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir @@ -108,6 +108,9 @@ ; CHECK: t2IT 0, 4, implicit-def $itstate ; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate ; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r7 ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -120,8 +123,8 @@ ; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: dead $lr = t2DLS renamable $r12 ; CHECK: $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $q0, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr @@ -132,8 +135,8 @@ ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.middle.block: + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.middle.block: ; CHECK: liveins: $q0 ; CHECK: renamable $r0 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit killed $r0 @@ -145,6 +148,11 @@ t2IT 0, 4, implicit-def $itstate renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $lr, $r7 + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -158,8 +166,8 @@ t2DoLoopStart renamable $r12 $r3 = tMOVr killed $r12, 14, $noreg - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $q0, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg @@ -172,10 +180,10 @@ renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.middle.block: + bb.3.middle.block: liveins: $q0 renamable $r0 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir @@ -108,12 +108,15 @@ ; CHECK-LABEL: name: non_masked_load ; CHECK: bb.0.entry: ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: liveins: $lr, $r0, $r1, $r2 + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r7 ; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 2, implicit-def $itstate ; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate ; CHECK: renamable $r0 = tUXTB killed renamable $r0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate ; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2 ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $lr, implicit-def $sp, implicit $sp ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -129,8 +132,8 @@ ; CHECK: renamable $r3 = t2LSRri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3 = t2SUBrs renamable $r2, killed renamable $r3, 34, 14 /* CC::al */, $noreg, $noreg ; CHECK: $lr = t2DLS killed renamable $lr - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP8 renamable $r2, 0, $noreg ; CHECK: $q1 = MVE_VORR killed $q0, killed $q0, 0, $noreg, undef $q1 @@ -140,8 +143,8 @@ ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 16, 14 /* CC::al */, $noreg ; CHECK: renamable $q2 = MVE_VADDi8 killed renamable $q2, renamable $q1, 0, $noreg, undef renamable $q2 ; CHECK: renamable $q0 = MVE_VADDi8 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.middle.block: + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.middle.block: ; CHECK: liveins: $q0, $q1, $r3 ; CHECK: renamable $vpr = MVE_VCTP8 killed renamable $r3, 0, $noreg ; CHECK: renamable $q0 = MVE_VPSEL killed renamable $q0, killed renamable $q1, 0, killed renamable $vpr @@ -158,6 +161,11 @@ renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate renamable $r0 = tUXTB killed renamable $r0, 0, $cpsr, implicit killed $r0, implicit $itstate tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $lr + frame-setup tPUSH 14, $noreg, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -174,8 +182,8 @@ renamable $r3 = t2SUBrs renamable $r2, killed renamable $r3, 34, 14, $noreg, $noreg t2DoLoopStart renamable $lr - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $q0, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP8 renamable $r2, 0, $noreg @@ -187,10 +195,10 @@ renamable $q2 = MVE_VADDi8 killed renamable $q2, renamable $q1, 0, $noreg, undef renamable $q2 renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $q0 = MVE_VADDi8 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.middle.block: + bb.3.middle.block: liveins: $q0, $q1, $r3 renamable $vpr = MVE_VCTP8 killed renamable $r3, 0, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-store.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-store.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-store.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-store.mir @@ -110,14 +110,17 @@ ; CHECK: tCMPi8 renamable $r3, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 8, implicit-def $itstate ; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $r12 = t2ADDri renamable $r3, 15, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $lr = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 15, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 16, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 35, 14 /* CC::al */, $noreg, $noreg ; CHECK: $lr = t2DLS killed renamable $lr - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP8 renamable $r3, 0, $noreg ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 16, 14 /* CC::al */, $noreg @@ -126,8 +129,8 @@ ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRBU8_post killed renamable $r2, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv1618, align 1) ; CHECK: renamable $q0 = MVE_VADDi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r0 = MVE_VSTRBU8_post killed renamable $q0, killed renamable $r0, 16, 1, $noreg :: (store 16 into %ir.lsr.iv1921, align 1) - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -142,6 +145,11 @@ tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tPOP_RET 0, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $lr + renamable $r12 = t2ADDri renamable $r3, 15, 14, $noreg, $noreg renamable $lr = t2MOVi 1, 14, $noreg, $noreg renamable $r12 = t2BICri killed renamable $r12, 15, 14, $noreg, $noreg @@ -149,8 +157,8 @@ renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 35, 14, $noreg, $noreg t2DoLoopStart renamable $lr - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP8 renamable $r3, 0, $noreg @@ -161,10 +169,10 @@ renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $q0 = MVE_VADDi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 renamable $r0 = MVE_VSTRBU8_post killed renamable $q0, killed renamable $r0, 16, 1, $noreg :: (store 16 into %ir.lsr.iv1921, align 1) - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: tPOP_RET 14, $noreg, def $r7, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll @@ -9,6 +9,7 @@ ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: uxtbeq r0, r0 ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: add.w r3, r2, #15 ; CHECK-NEXT: vmov.i32 q1, #0x0 @@ -17,7 +18,7 @@ ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #4 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB0_1: @ %vector.body +; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.8 r2 ; CHECK-NEXT: vmov q0, q1 @@ -28,8 +29,8 @@ ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrbt.u8 q2, [r1], #16 ; CHECK-NEXT: vadd.i8 q1, q1, q2 -; CHECK-NEXT: le lr, .LBB0_1 -; CHECK-NEXT: @ %bb.2: @ %middle.block +; CHECK-NEXT: le lr, .LBB0_2 +; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u8 r0, q0 ; CHECK-NEXT: pop.w {r7, lr} @@ -79,6 +80,7 @@ ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: sxtheq r0, r0 ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB1_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: adds r3, r2, #7 ; CHECK-NEXT: vmov.i32 q1, #0x0 @@ -87,7 +89,7 @@ ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #3 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB1_1: @ %vector.body +; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.16 r2 ; CHECK-NEXT: vmov q0, q1 @@ -98,8 +100,8 @@ ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrbt.u16 q2, [r1], #8 ; CHECK-NEXT: vadd.i16 q1, q1, q2 -; CHECK-NEXT: le lr, .LBB1_1 -; CHECK-NEXT: @ %bb.2: @ %middle.block +; CHECK-NEXT: le lr, .LBB1_2 +; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u16 r0, q0 ; CHECK-NEXT: pop.w {r7, lr} @@ -151,17 +153,18 @@ ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: uxtbeq r0, r0 ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB2_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: dlstp.8 lr, r2 -; CHECK-NEXT: .LBB2_1: @ %vector.body +; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u8 q1, [r1], #16 ; CHECK-NEXT: vldrb.u8 q2, [r0], #16 ; CHECK-NEXT: vsub.i8 q1, q2, q1 ; CHECK-NEXT: vadd.i8 q0, q1, q0 -; CHECK-NEXT: letp lr, .LBB2_1 -; CHECK-NEXT: @ %bb.2: @ %middle.block +; CHECK-NEXT: letp lr, .LBB2_2 +; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vaddv.u8 r0, q0 ; CHECK-NEXT: pop.w {r7, lr} ; CHECK-NEXT: uxtb r0, r0 @@ -210,17 +213,18 @@ ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: sxtheq r0, r0 ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB3_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: dlstp.16 lr, r2 -; CHECK-NEXT: .LBB3_1: @ %vector.body +; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u16 q1, [r0], #8 ; CHECK-NEXT: vldrb.u16 q2, [r1], #8 ; CHECK-NEXT: vsub.i16 q1, q2, q1 ; CHECK-NEXT: vadd.i16 q0, q1, q0 -; CHECK-NEXT: letp lr, .LBB3_1 -; CHECK-NEXT: @ %bb.2: @ %middle.block +; CHECK-NEXT: letp lr, .LBB3_2 +; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vaddv.u16 r0, q0 ; CHECK-NEXT: pop.w {r7, lr} ; CHECK-NEXT: sxth r0, r0 @@ -271,17 +275,18 @@ ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: uxtbeq r0, r0 ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB4_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: dlstp.8 lr, r2 -; CHECK-NEXT: .LBB4_1: @ %vector.body +; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u8 q1, [r0], #16 ; CHECK-NEXT: vldrb.u8 q2, [r1], #16 ; CHECK-NEXT: vmul.i8 q1, q2, q1 ; CHECK-NEXT: vadd.i8 q0, q1, q0 -; CHECK-NEXT: letp lr, .LBB4_1 -; CHECK-NEXT: @ %bb.2: @ %middle.block +; CHECK-NEXT: letp lr, .LBB4_2 +; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vaddv.u8 r0, q0 ; CHECK-NEXT: pop.w {r7, lr} ; CHECK-NEXT: uxtb r0, r0 @@ -330,17 +335,18 @@ ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: sxtheq r0, r0 ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB5_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: dlstp.16 lr, r2 -; CHECK-NEXT: .LBB5_1: @ %vector.body +; CHECK-NEXT: .LBB5_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u16 q1, [r0], #8 ; CHECK-NEXT: vldrb.u16 q2, [r1], #8 ; CHECK-NEXT: vmul.i16 q1, q2, q1 ; CHECK-NEXT: vadd.i16 q0, q1, q0 -; CHECK-NEXT: letp lr, .LBB5_1 -; CHECK-NEXT: @ %bb.2: @ %middle.block +; CHECK-NEXT: letp lr, .LBB5_2 +; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vaddv.u16 r0, q0 ; CHECK-NEXT: pop.w {r7, lr} ; CHECK-NEXT: sxth r0, r0 @@ -636,6 +642,7 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: .LBB8_5: @ %vector.ph ; CHECK-NEXT: movw r1, :lower16:days ; CHECK-NEXT: movt r1, :upper16:days ; CHECK-NEXT: movs r2, #52 @@ -645,12 +652,12 @@ ; CHECK-NEXT: vmov.32 q1[0], r0 ; CHECK-NEXT: subs r0, r3, #1 ; CHECK-NEXT: dlstp.32 lr, r0 -; CHECK-NEXT: .LBB8_5: @ %vector.body +; CHECK-NEXT: .LBB8_6: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vadd.i32 q1, q0, q1 -; CHECK-NEXT: letp lr, .LBB8_5 -; CHECK-NEXT: @ %bb.6: @ %middle.block +; CHECK-NEXT: letp lr, .LBB8_6 +; CHECK-NEXT: @ %bb.7: @ %middle.block ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: pop {r4, pc} diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/safe-retaining.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/safe-retaining.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/safe-retaining.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/safe-retaining.mir @@ -125,11 +125,14 @@ ; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + ; CHECK: bb.1.loop.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) ; CHECK: dead $lr = MVE_DLSTP_32 killed renamable $r3 ; CHECK: $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg - ; CHECK: bb.1.loop.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.loop.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $r0, $r1, $r2, $r12 ; CHECK: $lr = tMOVr $r12, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg @@ -137,8 +140,8 @@ ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.addr.a, align 4) ; CHECK: renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg ; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 0, killed $noreg :: (store 16 into %ir.addr.c, align 4) - ; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.1 - ; CHECK: bb.2.exit: + ; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -151,12 +154,17 @@ tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + + bb.1.loop.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r4, $lr + renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) t2DoLoopStart renamable $r4 $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg - bb.1.loop.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.loop.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $r0, $r1, $r2, $r3, $r12 renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg @@ -170,10 +178,10 @@ renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg MVE_VPST 8, implicit $vpr renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4) - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.exit: + bb.3.exit: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc ... @@ -217,11 +225,14 @@ ; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + ; CHECK: bb.1.loop.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $r12 = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) ; CHECK: dead $lr = MVE_DLSTP_16 killed renamable $r3 ; CHECK: $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg - ; CHECK: bb.1.loop.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.loop.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $r0, $r1, $r2, $r4 ; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg ; CHECK: renamable $r4, dead $cpsr = tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg @@ -230,8 +241,8 @@ ; CHECK: $r0 = tMOVr $r1, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VQSHRUNs16th killed renamable $q1, killed renamable $q0, 1, 0, $noreg ; CHECK: renamable $r2 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r2, 16, 0, killed $noreg :: (store 16 into %ir.addr.c, align 2) - ; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.1 - ; CHECK: bb.2.exit: + ; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -244,12 +255,17 @@ tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + + bb.1.loop.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r4, $lr + renamable $r12 = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) t2DoLoopStart renamable $r12 $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg - bb.1.loop.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.loop.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $r0, $r1, $r2, $r3, $r4 renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg @@ -264,10 +280,10 @@ renamable $q1 = MVE_VQSHRUNs16th killed renamable $q1, killed renamable $q0, 1, 0, $noreg MVE_VPST 8, implicit $vpr renamable $r2 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 2) - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.exit: + bb.3.exit: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir @@ -117,20 +117,23 @@ ; CHECK: tCMPi8 $r3, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 8, implicit-def $itstate ; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.for.body.preheader: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14 /* CC::al */, $noreg ; CHECK: $lr = t2DLS killed $r3 - ; CHECK: bb.1.for.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.for.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2 ; CHECK: dead renamable $r3 = SPACE 4070, undef renamable $r0 ; CHECK: renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep3) ; CHECK: renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep7) ; CHECK: renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14 /* CC::al */, $noreg ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep11) - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -143,14 +146,19 @@ tCMPi8 $r3, 0, 14, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tPOP_RET 0, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.for.body.preheader: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r7, $lr + renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg $lr = tMOVr $r3, 14, $noreg t2DoLoopStart killed $r3 - bb.1.for.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.for.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1, $r2 dead renamable $r3 = SPACE 4070, undef renamable $r0 @@ -159,10 +167,10 @@ renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14, $noreg early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep11) renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: tPOP_RET 14, $noreg, def $r7, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll @@ -9,15 +9,16 @@ ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: dlstp.16 lr, r3 -; CHECK-NEXT: .LBB0_1: @ %vector.body +; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q0, [r1], #16 ; CHECK-NEXT: vldrh.u16 q1, [r0], #16 ; CHECK-NEXT: vqadd.u16 q0, q1, q0 ; CHECK-NEXT: vstrh.16 q0, [r2], #16 -; CHECK-NEXT: letp lr, .LBB0_1 -; CHECK-NEXT: @ %bb.2: @ %while.end +; CHECK-NEXT: letp lr, .LBB0_2 +; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} entry: %cmp7 = icmp eq i32 %blockSize, 0 @@ -58,15 +59,16 @@ ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB1_1: @ %vector.ph ; CHECK-NEXT: dlstp.16 lr, r3 -; CHECK-NEXT: .LBB1_1: @ %vector.body +; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q0, [r1], #16 ; CHECK-NEXT: vldrh.u16 q1, [r0], #16 ; CHECK-NEXT: vqadd.s16 q0, q1, q0 ; CHECK-NEXT: vstrh.16 q0, [r2], #16 -; CHECK-NEXT: letp lr, .LBB1_1 -; CHECK-NEXT: @ %bb.2: @ %while.end +; CHECK-NEXT: letp lr, .LBB1_2 +; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} entry: %cmp7 = icmp eq i32 %blockSize, 0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll @@ -9,14 +9,15 @@ ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: dlstp.32 lr, r2 -; CHECK-NEXT: .LBB0_1: @ %vector.body +; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vabs.f32 q0, q0 ; CHECK-NEXT: vstrw.32 q0, [r1], #16 -; CHECK-NEXT: letp lr, .LBB0_1 -; CHECK-NEXT: @ %bb.2: @ %while.end +; CHECK-NEXT: letp lr, .LBB0_2 +; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} entry: %cmp3 = icmp eq i32 %blockSize, 0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll @@ -9,14 +9,15 @@ ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: dlstp.32 lr, r2 -; CHECK-NEXT: .LBB0_1: @ %vector.body +; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vrinta.f32 q0, q0 ; CHECK-NEXT: vstrw.32 q0, [r1], #16 -; CHECK-NEXT: letp lr, .LBB0_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB0_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %cmp5 = icmp eq i32 %n, 0 @@ -54,14 +55,15 @@ ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB1_1: @ %vector.ph ; CHECK-NEXT: dlstp.32 lr, r2 -; CHECK-NEXT: .LBB1_1: @ %vector.body +; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vrintx.f32 q0, q0 ; CHECK-NEXT: vstrw.32 q0, [r1], #16 -; CHECK-NEXT: letp lr, .LBB1_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB1_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %cmp5 = icmp eq i32 %n, 0 @@ -99,14 +101,15 @@ ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB2_1: @ %vector.ph ; CHECK-NEXT: dlstp.32 lr, r2 -; CHECK-NEXT: .LBB2_1: @ %vector.body +; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vrintz.f32 q0, q0 ; CHECK-NEXT: vstrw.32 q0, [r1], #16 -; CHECK-NEXT: letp lr, .LBB2_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB2_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %cmp5 = icmp eq i32 %n, 0 @@ -144,14 +147,15 @@ ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB3_1: @ %vector.ph ; CHECK-NEXT: dlstp.32 lr, r2 -; CHECK-NEXT: .LBB3_1: @ %vector.body +; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vrintp.f32 q0, q0 ; CHECK-NEXT: vstrw.32 q0, [r1], #16 -; CHECK-NEXT: letp lr, .LBB3_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB3_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %cmp5 = icmp eq i32 %n, 0 @@ -189,14 +193,15 @@ ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB4_1: @ %vector.ph ; CHECK-NEXT: dlstp.32 lr, r2 -; CHECK-NEXT: .LBB4_1: @ %vector.body +; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vrintm.f32 q0, q0 ; CHECK-NEXT: vstrw.32 q0, [r1], #16 -; CHECK-NEXT: letp lr, .LBB4_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB4_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %cmp5 = icmp eq i32 %n, 0 @@ -235,6 +240,7 @@ ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB5_1: @ %vector.ph ; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: vdup.32 q1, r2 ; CHECK-NEXT: bic r3, r3, #3 @@ -245,7 +251,7 @@ ; CHECK-NEXT: vldrw.u32 q0, [r3] ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB5_1: @ %vector.body +; CHECK-NEXT: .LBB5_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vadd.i32 q2, q0, r12 ; CHECK-NEXT: vdup.32 q3, r12 @@ -261,11 +267,11 @@ ; CHECK-NEXT: vrintr.f32 s12, s8 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q3, [r1], #16 -; CHECK-NEXT: le lr, .LBB5_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: le lr, .LBB5_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: @ %bb.4: ; CHECK-NEXT: .LCPI5_0: ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 1 @ 0x1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll @@ -9,16 +9,17 @@ ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: subs r3, #1 ; CHECK-NEXT: dlstp.16 lr, r3 -; CHECK-NEXT: .LBB0_1: @ %vector.body +; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q0, [r1], #16 ; CHECK-NEXT: vldrh.u16 q1, [r0], #16 ; CHECK-NEXT: vqsub.u16 q0, q1, q0 ; CHECK-NEXT: vstrh.16 q0, [r2], #16 -; CHECK-NEXT: letp lr, .LBB0_1 -; CHECK-NEXT: @ %bb.2: @ %while.end +; CHECK-NEXT: letp lr, .LBB0_2 +; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} entry: %cmp7 = icmp eq i32 %blockSize, 0 @@ -59,16 +60,17 @@ ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB1_1: @ %vector.ph ; CHECK-NEXT: subs r3, #1 ; CHECK-NEXT: dlstp.16 lr, r3 -; CHECK-NEXT: .LBB1_1: @ %vector.body +; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q0, [r1], #16 ; CHECK-NEXT: vldrh.u16 q1, [r0], #16 ; CHECK-NEXT: vqsub.s16 q0, q1, q0 ; CHECK-NEXT: vstrh.16 q0, [r2], #16 -; CHECK-NEXT: letp lr, .LBB1_1 -; CHECK-NEXT: @ %bb.2: @ %while.end +; CHECK-NEXT: letp lr, .LBB1_2 +; CHECK-NEXT: @ %bb.3: @ %while.end ; CHECK-NEXT: pop {r7, pc} entry: %cmp7 = icmp eq i32 %blockSize, 0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s ; The following functions should all fail to become tail-predicated. @@ -453,7 +454,7 @@ ret void } -; adding 5, instead of 4, to index. +; adding 5, instead of 4, to index. define void @wrong_index_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* noalias nocapture %c, i32 %N) { entry: %cmp8 = icmp eq i32 %N, 0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir @@ -82,6 +82,9 @@ ; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 8, implicit-def $itstate ; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r5, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg @@ -91,8 +94,8 @@ ; CHECK: $r12 = t2MOVTi16 killed $r12, 65535, 14 /* CC::al */, $noreg ; CHECK: dead $lr = t2DLS renamable $r3 ; CHECK: $r5 = tMOVr killed $r3, 14 /* CC::al */, $noreg - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $r0, $r1, $r2, $r5, $r12 ; CHECK: $r3 = tMOVr $r12, 14 /* CC::al */, $noreg ; CHECK: renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg @@ -103,8 +106,8 @@ ; CHECK: early-clobber renamable $r1 = t2STRH_POST killed renamable $r3, killed renamable $r1, 2, 14 /* CC::al */, $noreg :: (store 2 into %ir.lsr.iv.2) ; CHECK: renamable $r5, dead $cpsr = nsw tSUBi8 killed $r5, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg - ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.exit: + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r5, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -117,6 +120,11 @@ tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r5, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r5, $lr + renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg @@ -127,8 +135,8 @@ t2DoLoopStart renamable $r3 $r5 = tMOVr killed $r3, 14 /* CC::al */, $noreg - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $r0, $r1, $r2, $r5, $r12 $r3 = tMOVr $r12, 14 /* CC::al */, $noreg @@ -141,10 +149,10 @@ renamable $r5, dead $cpsr = nsw tSUBi8 killed $r5, 1, 14 /* CC::al */, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.exit: + bb.3.exit: tPOP_RET 14 /* CC::al */, $noreg, def $r5, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-retaining.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-retaining.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-retaining.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-retaining.mir @@ -123,11 +123,14 @@ ; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + ; CHECK: bb.1.loop.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) ; CHECK: dead $lr = t2DLS renamable $r4 ; CHECK: $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg - ; CHECK: bb.1.loop.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.loop.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $r0, $r1, $r2, $r3, $r12 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg ; CHECK: $lr = tMOVr $r12, 14 /* CC::al */, $noreg @@ -140,8 +143,8 @@ ; CHECK: renamable $q0 = MVE_VQSHRNbhs32 killed renamable $q0, killed renamable $q1, 15, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr ; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4) - ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.exit: + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.exit: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -154,12 +157,17 @@ tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + + bb.1.loop.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r4, $lr + renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) t2DoLoopStart renamable $r4 $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg - bb.1.loop.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.loop.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $r0, $r1, $r2, $r3, $r12 renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg @@ -174,10 +182,10 @@ renamable $q0 = MVE_VQSHRNbhs32 killed renamable $q0, killed renamable $q1, 15, 0, $noreg MVE_VPST 8, implicit $vpr renamable $r2 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4) - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.exit: + bb.3.exit: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc ... @@ -221,11 +229,14 @@ ; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + ; CHECK: bb.1.loop.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) ; CHECK: dead $lr = t2DLS renamable $r4 ; CHECK: $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg - ; CHECK: bb.1.loop.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.loop.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $r0, $r1, $r2, $r3, $r12 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr @@ -238,8 +249,8 @@ ; CHECK: renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr ; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4) - ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.exit: + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.exit: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -252,12 +263,17 @@ tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + + bb.1.loop.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r4, $lr + renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) t2DoLoopStart renamable $r4 $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg - bb.1.loop.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.loop.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $r0, $r1, $r2, $r3, $r12 renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg @@ -272,10 +288,10 @@ renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg MVE_VPST 8, implicit $vpr renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4) - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.exit: + bb.3.exit: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir @@ -853,15 +853,18 @@ ; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 8, implicit-def $itstate ; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2 - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1 ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, killed $noreg :: (load 8 from %ir.lsr.iv17, align 2) ; CHECK: renamable $r12 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 - ; CHECK: bb.2.exit: + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -874,6 +877,11 @@ tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg @@ -882,8 +890,8 @@ t2DoLoopStart renamable $r12 $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg @@ -895,10 +903,10 @@ renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.exit: + bb.3.exit: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ... @@ -944,15 +952,18 @@ ; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 8, implicit-def $itstate ; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: $lr = MVE_DLSTP_16 killed renamable $r2 - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1 ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHU16_post killed renamable $r0, 16, 0, killed $noreg :: (load 16 from %ir.lsr.iv17, align 2) ; CHECK: renamable $r12 = MVE_VADDVs16no_acc killed renamable $q0, 0, $noreg ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 - ; CHECK: bb.2.exit: + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -965,6 +976,11 @@ tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg @@ -973,8 +989,8 @@ t2DoLoopStart renamable $r12 $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg @@ -986,10 +1002,10 @@ renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.exit: + bb.3.exit: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ... @@ -1035,15 +1051,18 @@ ; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 8, implicit-def $itstate ; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: $lr = MVE_DLSTP_8 killed renamable $r2 - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1 ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRBU8_post killed renamable $r0, 16, 0, killed $noreg :: (load 16 from %ir.lsr.iv17, align 1) ; CHECK: renamable $r12 = MVE_VADDVs8no_acc killed renamable $q0, 0, $noreg ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 - ; CHECK: bb.2.exit: + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -1056,6 +1075,11 @@ tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + renamable $r3, dead $cpsr = tADDi3 renamable $r2, 7, 14 /* CC::al */, $noreg renamable $r3 = t2BICri killed renamable $r3, 7, 14 /* CC::al */, $noreg, $noreg renamable $r12 = t2SUBri killed renamable $r3, 7, 14 /* CC::al */, $noreg, $noreg @@ -1064,8 +1088,8 @@ t2DoLoopStart renamable $r12 $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP8 renamable $r2, 0, $noreg @@ -1077,10 +1101,10 @@ renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 16, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.exit: + bb.3.exit: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ... @@ -1234,6 +1258,9 @@ ; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 8, implicit-def $itstate ; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg @@ -1241,8 +1268,8 @@ ; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: dead $lr = t2DLS renamable $r12 ; CHECK: $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr @@ -1253,8 +1280,8 @@ ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg - ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.exit: + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -1267,6 +1294,11 @@ tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg @@ -1275,8 +1307,8 @@ t2DoLoopStart renamable $r12 $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg @@ -1289,10 +1321,10 @@ early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.exit: + bb.3.exit: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ... @@ -1459,6 +1491,9 @@ ; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 8, implicit-def $itstate ; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg @@ -1466,8 +1501,8 @@ ; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: dead $lr = t2DLS renamable $r12 ; CHECK: $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr @@ -1478,8 +1513,8 @@ ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg - ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.exit: + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -1492,6 +1527,11 @@ tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg @@ -1500,8 +1540,8 @@ t2DoLoopStart renamable $r12 $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg @@ -1514,10 +1554,10 @@ early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.exit: + bb.3.exit: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ... @@ -1687,6 +1727,9 @@ ; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 8, implicit-def $itstate ; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg @@ -1696,8 +1739,8 @@ ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) ; CHECK: dead $lr = t2DLS renamable $r12 ; CHECK: $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $q0, $r0, $r1, $r2, $r4 ; CHECK: renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr @@ -1709,9 +1752,10 @@ ; CHECK: renamable $r3 = t2SXTH killed renamable $r12, 0, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) - ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.exit: + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc + bb.0.entry: successors: %bb.1(0x80000000) liveins: $r0, $r1, $r2, $r4, $lr @@ -1723,6 +1767,11 @@ tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r4, $lr + renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg @@ -1733,8 +1782,8 @@ t2DoLoopStart renamable $r12 $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $q0, $r0, $r1, $r2, $r4 renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg @@ -1748,10 +1797,10 @@ renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.exit: + bb.3.exit: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc ... @@ -1932,6 +1981,9 @@ ; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 8, implicit-def $itstate ; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg @@ -1941,8 +1993,8 @@ ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) ; CHECK: dead $lr = t2DLS renamable $r12 ; CHECK: $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $q0, $r0, $r1, $r2, $r4 ; CHECK: renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr @@ -1954,8 +2006,8 @@ ; CHECK: renamable $r3 = t2UXTH killed renamable $r12, 0, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) - ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.exit: + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -1968,6 +2020,11 @@ tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r4, $lr + renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg @@ -1978,8 +2035,8 @@ t2DoLoopStart renamable $r12 $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $q0, $r0, $r1, $r2, $r4 renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg @@ -1993,10 +2050,10 @@ renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.exit: + bb.3.exit: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc ... @@ -2177,6 +2234,9 @@ ; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 8, implicit-def $itstate ; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 7, 14 /* CC::al */, $noreg ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 7, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 7, 14 /* CC::al */, $noreg, $noreg @@ -2186,8 +2246,8 @@ ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) ; CHECK: dead $lr = t2DLS renamable $r12 ; CHECK: $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $q0, $r0, $r1, $r2, $r4 ; CHECK: renamable $vpr = MVE_VCTP8 renamable $r2, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr @@ -2199,8 +2259,8 @@ ; CHECK: renamable $r3 = t2SXTB killed renamable $r12, 0, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 16, 14 /* CC::al */, $noreg ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) - ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.exit: + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -2213,6 +2273,11 @@ tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r4, $lr + renamable $r3, dead $cpsr = tADDi3 renamable $r2, 7, 14 /* CC::al */, $noreg renamable $r3 = t2BICri killed renamable $r3, 7, 14 /* CC::al */, $noreg, $noreg renamable $r12 = t2SUBri killed renamable $r3, 7, 14 /* CC::al */, $noreg, $noreg @@ -2223,8 +2288,8 @@ t2DoLoopStart renamable $r12 $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $q0, $r0, $r1, $r2, $r4 renamable $vpr = MVE_VCTP8 renamable $r2, 0, $noreg @@ -2238,10 +2303,10 @@ renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 16, 14 /* CC::al */, $noreg early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.exit: + bb.3.exit: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc ... @@ -2422,6 +2487,9 @@ ; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 8, implicit-def $itstate ; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 7, 14 /* CC::al */, $noreg ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 7, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 7, 14 /* CC::al */, $noreg, $noreg @@ -2431,8 +2499,8 @@ ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) ; CHECK: dead $lr = t2DLS renamable $r12 ; CHECK: $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $q0, $r0, $r1, $r2, $r4 ; CHECK: renamable $vpr = MVE_VCTP8 renamable $r2, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr @@ -2444,8 +2512,8 @@ ; CHECK: renamable $r3 = t2UXTB killed renamable $r12, 0, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 16, 14 /* CC::al */, $noreg ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) - ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.exit: + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -2458,6 +2526,11 @@ tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r4, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r4, $lr + renamable $r3, dead $cpsr = tADDi3 renamable $r2, 7, 14 /* CC::al */, $noreg renamable $r3 = t2BICri killed renamable $r3, 7, 14 /* CC::al */, $noreg, $noreg renamable $r12 = t2SUBri killed renamable $r3, 7, 14 /* CC::al */, $noreg, $noreg @@ -2468,8 +2541,8 @@ t2DoLoopStart renamable $r12 $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $q0, $r0, $r1, $r2, $r4 renamable $vpr = MVE_VCTP8 renamable $r2, 0, $noreg @@ -2483,10 +2556,10 @@ renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 16, 14 /* CC::al */, $noreg early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.exit: + bb.3.exit: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir @@ -115,6 +115,9 @@ ; CHECK: t2IT 0, 4, implicit-def $itstate ; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate ; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r7 ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -127,8 +130,8 @@ ; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: dead $lr = t2DLS renamable $r12 ; CHECK: $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $q1, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg ; CHECK: $q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, undef $q0 @@ -140,8 +143,8 @@ ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 - ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.middle.block: + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.middle.block: ; CHECK: liveins: $q0, $q1, $r2 ; CHECK: renamable $r0, dead $cpsr = tADDi3 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $vpr = MVE_VCTP32 killed renamable $r0, 0, $noreg @@ -156,6 +159,11 @@ t2IT 0, 4, implicit-def $itstate renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $lr, $r7 + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -169,8 +177,8 @@ t2DoLoopStart renamable $r12 $r3 = tMOVr killed $r12, 14, $noreg - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $q1, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg @@ -184,10 +192,10 @@ renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.middle.block: + bb.3.middle.block: liveins: $q0, $q1, $r2 renamable $r0, dead $cpsr = tADDi3 killed renamable $r2, 4, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subi3.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subi3.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subi3.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subi3.mir @@ -108,16 +108,19 @@ ; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3 - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2 ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv13, align 4) ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1416, align 4) ; CHECK: renamable $q0 = nsw MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1719, align 4) - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -130,6 +133,11 @@ tCMPi8 renamable $r3, 1, 14, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate tPOP_RET 11, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r7, $lr + renamable $r12 = t2ADDri renamable $r3, 3, 14, $noreg, $noreg renamable $lr = t2MOVi 1, 14, $noreg, $noreg renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg @@ -137,8 +145,8 @@ renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg t2DoLoopStart renamable $lr - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg @@ -150,10 +158,10 @@ MVE_VPST 8, implicit $vpr renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: tPOP_RET 14, $noreg, def $r7, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri.mir @@ -107,16 +107,19 @@ ; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3 - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2 ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv13, align 4) ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1416, align 4) ; CHECK: renamable $q0 = nsw MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1719, align 4) - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -129,6 +132,11 @@ tCMPi8 renamable $r3, 1, 14, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate tPOP_RET 11, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r7, $lr + renamable $r12 = t2ADDri renamable $r3, 3, 14, $noreg, $noreg renamable $lr = t2MOVi 1, 14, $noreg, $noreg renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg @@ -136,8 +144,8 @@ renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg t2DoLoopStart renamable $lr - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg @@ -149,10 +157,10 @@ MVE_VPST 8, implicit $vpr renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: tPOP_RET 14, $noreg, def $r7, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri12.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri12.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri12.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri12.mir @@ -107,16 +107,19 @@ ; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3 - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2 ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv13, align 4) ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1416, align 4) ; CHECK: renamable $q0 = nsw MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1719, align 4) - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -129,6 +132,11 @@ tCMPi8 renamable $r3, 1, 14, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate tPOP_RET 11, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r3, $r7, $lr + renamable $r12 = t2ADDri renamable $r3, 3, 14, $noreg, $noreg renamable $lr = t2MOVi 1, 14, $noreg, $noreg renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg @@ -136,8 +144,8 @@ renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg t2DoLoopStart renamable $lr - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg @@ -149,10 +157,10 @@ MVE_VPST 8, implicit $vpr renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: tPOP_RET 14, $noreg, def $r7, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir @@ -113,12 +113,15 @@ ; CHECK-LABEL: name: wrong_liveout_shift ; CHECK: bb.0.entry: ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: liveins: $lr, $r0, $r1, $r2 + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r7 ; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 2, implicit-def $itstate ; CHECK: renamable $r0 = t2MOVi16 32767, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate ; CHECK: renamable $r0 = tSXTH killed renamable $r0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate ; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2 ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $lr, implicit-def $sp, implicit $sp ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -135,8 +138,8 @@ ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from constant-pool) ; CHECK: renamable $r3 = t2SUBrs renamable $r2, killed renamable $r12, 26, 14 /* CC::al */, $noreg, $noreg ; CHECK: $lr = t2DLS killed renamable $lr - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg ; CHECK: $q1 = MVE_VORR killed $q0, killed $q0, 0, $noreg, undef $q1 @@ -146,8 +149,8 @@ ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = nuw MVE_VMULi16 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $q0 = MVE_VSUBi16 renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.middle.block: + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.middle.block: ; CHECK: liveins: $q0, $q1, $r3 ; CHECK: renamable $vpr = MVE_VCTP16 killed renamable $r3, 0, $noreg ; CHECK: renamable $q0 = MVE_VPSEL killed renamable $q0, killed renamable $q1, 0, killed renamable $vpr @@ -155,7 +158,7 @@ ; CHECK: $sp = t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr ; CHECK: renamable $r0 = tSXTH killed renamable $r0, 14 /* CC::al */, $noreg ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 - ; CHECK: bb.3 (align 16): + ; CHECK: bb.4 (align 16): ; CHECK: CONSTPOOL_ENTRY 0, %const.0, 16 bb.0.entry: successors: %bb.1(0x80000000) @@ -166,6 +169,11 @@ renamable $r0 = t2MOVi16 32767, 0, $cpsr, implicit killed $r0, implicit $itstate renamable $r0 = tSXTH killed renamable $r0, 0, $cpsr, implicit killed $r0, implicit $itstate tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $lr + frame-setup tPUSH 14, $noreg, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -183,8 +191,8 @@ renamable $r3 = t2SUBrs renamable $r2, killed renamable $r12, 26, 14, $noreg, $noreg t2DoLoopStart renamable $lr - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $q0, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg @@ -196,10 +204,10 @@ renamable $q0 = nuw MVE_VMULi16 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0 renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $q0 = MVE_VSUBi16 renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.middle.block: + bb.3.middle.block: liveins: $q0, $q1, $r3 renamable $vpr = MVE_VCTP16 killed renamable $r3, 0, $noreg @@ -209,7 +217,7 @@ renamable $r0 = tSXTH killed renamable $r0, 14, $noreg tBX_RET 14, $noreg, implicit killed $r0 - bb.3 (align 16): + bb.4 (align 16): CONSTPOOL_ENTRY 0, %const.0, 16 ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll @@ -8,19 +8,20 @@ ; CHECK-NEXT: itt eq ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: vmov.i32 q1, #0x0 ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dlstp.32 lr, r2 -; CHECK-NEXT: .LBB0_1: @ %vector.body +; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 ; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: vmul.i32 q0, q2, q0 ; CHECK-NEXT: vadd.i32 q1, q0, q1 -; CHECK-NEXT: letp lr, .LBB0_1 -; CHECK-NEXT: @ %bb.2: @ %middle.block +; CHECK-NEXT: letp lr, .LBB0_2 +; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: pop {r7, pc} @@ -75,17 +76,18 @@ ; CHECK-NEXT: itt eq ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB1_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: vmov.i32 q1, #0x0 ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: dlstp.32 lr, r2 -; CHECK-NEXT: .LBB1_1: @ %vector.body +; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: adds r1, #4 ; CHECK-NEXT: vadd.i32 q1, q0, q1 -; CHECK-NEXT: letp lr, .LBB1_1 -; CHECK-NEXT: @ %bb.2: @ %middle.block +; CHECK-NEXT: letp lr, .LBB1_2 +; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: pop {r7, pc} @@ -136,17 +138,18 @@ ; CHECK-NEXT: itt eq ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB2_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: vmov.i32 q1, #0x0 ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: dlstp.32 lr, r2 -; CHECK-NEXT: .LBB2_1: @ %vector.body +; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: adds r1, #4 ; CHECK-NEXT: vadd.i32 q1, q0, q1 -; CHECK-NEXT: letp lr, .LBB2_1 -; CHECK-NEXT: @ %bb.2: @ %middle.block +; CHECK-NEXT: letp lr, .LBB2_2 +; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: pop {r7, pc} @@ -197,16 +200,17 @@ ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB3_1: @ %vector.ph ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 -; CHECK-NEXT: .LBB3_1: @ %vector.body +; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vmul.i32 q0, q0, r2 ; CHECK-NEXT: vstrw.32 q0, [r0], #16 -; CHECK-NEXT: letp lr, .LBB3_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB3_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %cmp6 = icmp eq i32 %N, 0 @@ -253,16 +257,17 @@ ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB4_1: @ %vector.ph ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 -; CHECK-NEXT: .LBB4_1: @ %vector.body +; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vadd.i32 q0, q0, r2 ; CHECK-NEXT: vstrw.32 q0, [r0], #16 -; CHECK-NEXT: letp lr, .LBB4_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB4_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %cmp6 = icmp eq i32 %N, 0 @@ -309,17 +314,18 @@ ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB5_1: @ %vector.ph ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.8 lr, r3 -; CHECK-NEXT: .LBB5_1: @ %vector.body +; CHECK-NEXT: .LBB5_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r12, r12, #16 ; CHECK-NEXT: vldrb.u8 q0, [r1], #16 ; CHECK-NEXT: vldrb.u8 q1, [r2], #16 ; CHECK-NEXT: vmul.i8 q0, q1, q0 ; CHECK-NEXT: vstrb.8 q0, [r0], #16 -; CHECK-NEXT: letp lr, .LBB5_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB5_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %cmp10 = icmp eq i32 %N, 0 @@ -368,17 +374,18 @@ ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB6_1: @ %vector.ph ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.16 lr, r3 -; CHECK-NEXT: .LBB6_1: @ %vector.body +; CHECK-NEXT: .LBB6_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r12, r12, #8 ; CHECK-NEXT: vldrh.u16 q0, [r1], #16 ; CHECK-NEXT: vldrh.u16 q1, [r2], #16 ; CHECK-NEXT: vmul.i16 q0, q1, q0 ; CHECK-NEXT: vstrh.16 q0, [r0], #16 -; CHECK-NEXT: letp lr, .LBB6_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB6_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %cmp10 = icmp eq i32 %N, 0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir @@ -51,6 +51,8 @@ ; Intentionally left blank - see MIR sequence below. entry: unreachable + vector.ph: + unreachable vector.body: unreachable for.cond.cleanup: @@ -61,6 +63,8 @@ ; Intentionally left blank - see MIR sequence below. entry: unreachable + vector.ph: + unreachable vector.body: unreachable for.cond.cleanup: @@ -113,6 +117,8 @@ ; Intentionally left blank - see MIR sequence below. entry: unreachable + vector.ph: + unreachable vector.body: unreachable for.cond.cleanup: @@ -123,6 +129,8 @@ ; Intentionally left blank - see MIR sequence below. entry: unreachable + vector.ph: + unreachable vector.body: unreachable for.cond.cleanup: @@ -133,6 +141,8 @@ ; Intentionally left blank - see MIR sequence below. entry: unreachable + vector.ph: + unreachable vector.body: unreachable for.cond.cleanup: @@ -202,18 +212,21 @@ ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r1 - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r2, $r3 ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 0, killed $noreg ; CHECK: MVE_VPTv4s32r 4, renamable $q1, renamable $r2, 11, implicit-def $vpr ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 12, 1, killed renamable $vpr ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -226,6 +239,11 @@ tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg @@ -235,8 +253,8 @@ renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg t2DoLoopStart renamable $lr - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $q0, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg @@ -248,10 +266,10 @@ renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ... --- @@ -311,6 +329,9 @@ ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg @@ -319,8 +340,8 @@ ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg ; CHECK: $lr = t2DLS killed renamable $lr - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr @@ -331,8 +352,8 @@ ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 1, killed renamable $vpr ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr ; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ; ; Tests that secondary VCTPs are refused when their operand's reaching definition is not the same as the main @@ -349,6 +370,11 @@ tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg @@ -358,8 +384,8 @@ renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg t2DoLoopStart renamable $lr - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $q0, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg @@ -372,10 +398,10 @@ renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ... --- @@ -435,6 +461,9 @@ ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg @@ -443,8 +472,8 @@ ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg ; CHECK: $lr = t2DLS killed renamable $lr - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr @@ -454,8 +483,8 @@ ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr ; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ; ; Tests that secondary VCTPs are refused when their operand is not the same register as the main VCTP's. @@ -471,6 +500,11 @@ tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg @@ -480,8 +514,8 @@ renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg t2DoLoopStart renamable $lr - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $q0, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg @@ -493,10 +527,10 @@ renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ... --- @@ -556,18 +590,21 @@ ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r1 - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r2, $r3 ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 0, killed $noreg ; CHECK: MVE_VPTv4s32r 12, renamable $q1, renamable $r2, 10, implicit-def $vpr ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 13, 1, killed renamable $vpr ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 2, killed renamable $vpr - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ; ; Test including a else-predicated VCTP. @@ -583,6 +620,11 @@ tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg @@ -592,8 +634,8 @@ renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg t2DoLoopStart renamable $lr - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $q0, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg @@ -605,10 +647,10 @@ renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 2, killed renamable $vpr renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ... --- @@ -668,18 +710,21 @@ ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r1 - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r2, $r3 ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 0, killed $noreg ; CHECK: MVE_VPTv4s32r 4, renamable $q0, renamable $r2, 11, implicit-def $vpr ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 12, 1, killed renamable $vpr ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -691,6 +736,11 @@ tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg @@ -700,8 +750,8 @@ renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg t2DoLoopStart renamable $lr - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $q0, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg @@ -713,10 +763,10 @@ renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ... --- @@ -776,16 +826,19 @@ ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r1, $r2 ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r1 - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r2, $r3 ; CHECK: MVE_VPTv4s32r 2, renamable $q0, renamable $r2, 8, implicit-def $vpr ; CHECK: dead renamable $vpr = MVE_VCMPs32r renamable $q0, renamable $r3, 12, 1, killed renamable $vpr - ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -797,6 +850,11 @@ tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg @@ -806,8 +864,8 @@ renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg t2DoLoopStart renamable $lr - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $q0, $r0, $r1, $r2, $r3 MVE_VPTv4s32r 2, renamable $q0, renamable $r2, 8, implicit-def $vpr @@ -815,10 +873,10 @@ renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ... --- @@ -878,6 +936,9 @@ ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 11, 8, implicit-def $itstate ; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg @@ -886,16 +947,16 @@ ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: dead renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg ; CHECK: $lr = t2DLS killed renamable $lr - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2 ; CHECK: MVE_VPTv4s32r 2, killed renamable $q0, renamable $r2, 2, implicit-def $vpr ; CHECK: renamable $q0 = MVE_VLDRWU32 renamable $r0, 0, 1, $vpr ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 1, killed $vpr ; CHECK: MVE_VSTRWU32 renamable $q0, renamable $r0, 0, 1, killed $vpr ; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.for.cond.cleanup: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ; ; This shouldn't be tail-predicated because the VLDR isn't predicated on the VCTP. @@ -910,6 +971,11 @@ tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 11, 8, implicit-def $itstate frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg @@ -919,8 +985,8 @@ renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg t2DoLoopStart renamable $lr - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $q0, $r0, $r1, $r2, $r3 MVE_VPTv4s32r 2, renamable $q0, renamable $r2, 2, implicit-def $vpr @@ -929,9 +995,9 @@ MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, $vpr renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14 /* CC::al */, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg - bb.2.for.cond.cleanup: + bb.3.for.cond.cleanup: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir @@ -113,12 +113,15 @@ ; CHECK-LABEL: name: wrong_liveout_shift ; CHECK: bb.0.entry: ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: liveins: $lr, $r0, $r1, $r2 + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r7 ; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 2, implicit-def $itstate ; CHECK: renamable $r0 = t2MOVi16 32767, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate ; CHECK: renamable $r0 = tSXTH killed renamable $r0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate ; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2 ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $lr, implicit-def $sp, implicit $sp ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -135,8 +138,8 @@ ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from constant-pool) ; CHECK: renamable $r3 = t2SUBrs renamable $r2, killed renamable $r12, 26, 14 /* CC::al */, $noreg, $noreg ; CHECK: $lr = t2DLS killed renamable $lr - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg ; CHECK: $q1 = MVE_VORR killed $q0, killed $q0, 0, $noreg, undef $q1 @@ -146,8 +149,8 @@ ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = nuw MVE_VMULi16 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $q0 = MVE_VSUBi16 renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.middle.block: + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.middle.block: ; CHECK: liveins: $q0, $q1, $r3 ; CHECK: renamable $vpr = MVE_VCTP16 killed renamable $r3, 0, $noreg ; CHECK: renamable $q0 = MVE_VPSEL killed renamable $q0, killed renamable $q1, 0, killed renamable $vpr @@ -155,7 +158,7 @@ ; CHECK: $sp = t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr ; CHECK: renamable $r0 = tSXTH killed renamable $r0, 14 /* CC::al */, $noreg ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 - ; CHECK: bb.3 (align 16): + ; CHECK: bb.4 (align 16): ; CHECK: CONSTPOOL_ENTRY 0, %const.0, 16 bb.0.entry: successors: %bb.1(0x80000000) @@ -166,6 +169,11 @@ renamable $r0 = t2MOVi16 32767, 0, $cpsr, implicit killed $r0, implicit $itstate renamable $r0 = tSXTH killed renamable $r0, 0, $cpsr, implicit killed $r0, implicit $itstate tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $lr + frame-setup tPUSH 14, $noreg, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -183,8 +191,8 @@ renamable $r3 = t2SUBrs renamable $r2, killed renamable $r12, 26, 14, $noreg, $noreg t2DoLoopStart renamable $lr - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $q0, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg @@ -196,10 +204,10 @@ renamable $q0 = nuw MVE_VMULi16 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0 renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $q0 = MVE_VSUBi16 renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.middle.block: + bb.3.middle.block: liveins: $q0, $q1, $r3 renamable $vpr = MVE_VCTP16 killed renamable $r3, 0, $noreg @@ -209,7 +217,7 @@ renamable $r0 = tSXTH killed renamable $r0, 14, $noreg tBX_RET 14, $noreg, implicit killed $r0 - bb.3 (align 16): + bb.4 (align 16): CONSTPOOL_ENTRY 0, %const.0, 16 ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir @@ -120,6 +120,9 @@ ; CHECK: t2IT 0, 4, implicit-def $itstate ; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate ; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r7 ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -133,8 +136,8 @@ ; CHECK: dead $lr = t2DLS renamable $r3 ; CHECK: $r12 = tMOVr killed $r3, 14 /* CC::al */, $noreg ; CHECK: $r3 = tMOVr $r2, 14 /* CC::al */, $noreg - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $q1, $r0, $r1, $r2, $r3, $r12 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg ; CHECK: $q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, undef $q0 @@ -146,8 +149,8 @@ ; CHECK: renamable $r12 = nsw t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 - ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.middle.block: + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.middle.block: ; CHECK: liveins: $q0, $q1, $r2, $r3 ; CHECK: renamable $r0, dead $cpsr = tSUBi3 killed renamable $r2, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $q2 = MVE_VDUP32 killed renamable $r0, 0, $noreg, undef renamable $q2 @@ -164,6 +167,11 @@ t2IT 0, 4, implicit-def $itstate renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $lr, $r7 + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -178,8 +186,8 @@ $r12 = tMOVr killed $r3, 14, $noreg $r3 = tMOVr $r2, 14, $noreg - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $q1, $r0, $r1, $r2, $r3, $r12 renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg @@ -193,10 +201,10 @@ renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14, $noreg renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.middle.block: + bb.3.middle.block: liveins: $q0, $q1, $r2, $r3 renamable $r0, dead $cpsr = tSUBi3 killed renamable $r2, 1, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir @@ -112,6 +112,9 @@ ; CHECK: t2IT 0, 4, implicit-def $itstate ; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate ; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r7 ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -124,8 +127,8 @@ ; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: dead $lr = t2DLS renamable $r12 ; CHECK: $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg - ; CHECK: bb.1.vector.body: - ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: bb.2.vector.body: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $q1, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg ; CHECK: $q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, undef $q0 @@ -137,8 +140,8 @@ ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 - ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1 - ; CHECK: bb.2.middle.block: + ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: bb.3.middle.block: ; CHECK: liveins: $q0, $q1, $r2 ; CHECK: renamable $vpr = MVE_VCTP32 killed renamable $r2, 0, $noreg ; CHECK: renamable $q0 = MVE_VPSEL killed renamable $q1, killed renamable $q0, 0, killed renamable $vpr @@ -152,6 +155,11 @@ t2IT 0, 4, implicit-def $itstate renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2, $lr, $r7 + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -165,8 +173,8 @@ t2DoLoopStart renamable $r12 $r3 = tMOVr killed $r12, 14, $noreg - bb.1.vector.body: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) + bb.2.vector.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $q1, $r0, $r1, $r2, $r3 renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg @@ -180,10 +188,10 @@ renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 renamable $lr = t2LoopDec killed renamable $lr, 1 - t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr - tB %bb.2, 14, $noreg + t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14, $noreg - bb.2.middle.block: + bb.3.middle.block: liveins: $q0, $q1, $r2 renamable $vpr = MVE_VCTP32 killed renamable $r2, 0, $noreg diff --git a/llvm/test/CodeGen/Thumb2/constant-hoisting.ll b/llvm/test/CodeGen/Thumb2/constant-hoisting.ll --- a/llvm/test/CodeGen/Thumb2/constant-hoisting.ll +++ b/llvm/test/CodeGen/Thumb2/constant-hoisting.ll @@ -122,28 +122,29 @@ ; CHECK-V7M: mov r2, r0 ; CHECK-V7M-NEXT: movs r0, #0 ; CHECK-V7M-NEXT: cmp r2, #29 -; CHECK-V7M-NEXT: bgt .LBB1_3 -; CHECK-V7M-NEXT: cbz r2, .LBB1_6 +; CHECK-V7M-NEXT: bgt .LBB1_4 +; CHECK-V7M-NEXT: cbz r2, .LBB1_7 ; CHECK-V7M-NEXT: cmp r2, #1 ; CHECK-V7M-NEXT: it ne ; CHECK-V7M-NEXT: bxne lr -; CHECK-V7M-NEXT: movw r0, #305 -; CHECK-V7M-NEXT: b .LBB1_8 ; CHECK-V7M-NEXT: .LBB1_3: +; CHECK-V7M-NEXT: movw r0, #305 +; CHECK-V7M-NEXT: b .LBB1_9 +; CHECK-V7M-NEXT: .LBB1_4: ; CHECK-V7M-NEXT: cmp r2, #30 -; CHECK-V7M-NEXT: beq .LBB1_7 +; CHECK-V7M-NEXT: beq .LBB1_8 ; CHECK-V7M-NEXT: cmp r2, #50 -; CHECK-V7M-NEXT: bne .LBB1_9 +; CHECK-V7M-NEXT: bne .LBB1_10 ; CHECK-V7M-NEXT: movw r0, #307 -; CHECK-V7M-NEXT: b .LBB1_8 -; CHECK-V7M-NEXT: .LBB1_6: -; CHECK-V7M-NEXT: mov.w r0, #304 -; CHECK-V7M-NEXT: b .LBB1_8 +; CHECK-V7M-NEXT: b .LBB1_9 ; CHECK-V7M-NEXT: .LBB1_7: -; CHECK-V7M-NEXT: mov.w r0, #306 +; CHECK-V7M-NEXT: mov.w r0, #304 +; CHECK-V7M-NEXT: b .LBB1_9 ; CHECK-V7M-NEXT: .LBB1_8: -; CHECK-V7M-NEXT: ldrb r0, [r1, r0] +; CHECK-V7M-NEXT: mov.w r0, #306 ; CHECK-V7M-NEXT: .LBB1_9: +; CHECK-V7M-NEXT: ldrb r0, [r1, r0] +; CHECK-V7M-NEXT: .LBB1_10: ; CHECK-V7M-NEXT: bx lr entry: switch i32 %a, label %return [ diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll --- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -7,15 +7,16 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: ldrh r1, [r1] -; CHECK-NEXT: .LBB0_1: @ %vector.body +; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: subs r3, #8 ; CHECK-NEXT: vadd.f16 q0, q0, r1 ; CHECK-NEXT: vstrb.8 q0, [r2], #16 -; CHECK-NEXT: bne .LBB0_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB0_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %B = load half, half* %BB @@ -53,15 +54,16 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB1_1: @ %vector.ph ; CHECK-NEXT: ldrh r1, [r1] -; CHECK-NEXT: .LBB1_1: @ %vector.body +; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: subs r3, #8 ; CHECK-NEXT: vadd.f16 q0, q0, r1 ; CHECK-NEXT: vstrb.8 q0, [r2], #16 -; CHECK-NEXT: bne .LBB1_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB1_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %B = load half, half* %BB @@ -99,15 +101,16 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB2_1: @ %vector.ph ; CHECK-NEXT: ldrh r1, [r1] -; CHECK-NEXT: .LBB2_1: @ %vector.body +; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: subs r3, #8 ; CHECK-NEXT: vmul.f16 q0, q0, r1 ; CHECK-NEXT: vstrb.8 q0, [r2], #16 -; CHECK-NEXT: bne .LBB2_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB2_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %B = load half, half* %BB @@ -145,15 +148,16 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB3_1: @ %vector.ph ; CHECK-NEXT: ldrh r1, [r1] -; CHECK-NEXT: .LBB3_1: @ %vector.body +; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: subs r3, #8 ; CHECK-NEXT: vmul.f16 q0, q0, r1 ; CHECK-NEXT: vstrb.8 q0, [r2], #16 -; CHECK-NEXT: bne .LBB3_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB3_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %B = load half, half* %BB @@ -191,15 +195,16 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB4_1: @ %vector.ph ; CHECK-NEXT: ldrh r1, [r1] -; CHECK-NEXT: .LBB4_1: @ %vector.body +; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: subs r3, #8 ; CHECK-NEXT: vsub.f16 q0, q0, r1 ; CHECK-NEXT: vstrb.8 q0, [r2], #16 -; CHECK-NEXT: bne .LBB4_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB4_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %B = load half, half* %BB @@ -237,16 +242,17 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB5_1: @ %vector.ph ; CHECK-NEXT: ldrh r1, [r1] ; CHECK-NEXT: vdup.16 q0, r1 -; CHECK-NEXT: .LBB5_1: @ %vector.body +; CHECK-NEXT: .LBB5_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: subs r3, #8 ; CHECK-NEXT: vsub.f16 q1, q0, q1 ; CHECK-NEXT: vstrb.8 q1, [r2], #16 -; CHECK-NEXT: bne .LBB5_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB5_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %B = load half, half* %BB @@ -286,16 +292,17 @@ ; CHECK-NEXT: cmp.w r12, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB6_1: @ %vector.ph ; CHECK-NEXT: ldrh r2, [r2] -; CHECK-NEXT: .LBB6_1: @ %vector.body +; CHECK-NEXT: .LBB6_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: subs.w r12, r12, #8 ; CHECK-NEXT: vfmas.f16 q1, q0, r2 ; CHECK-NEXT: vstrb.8 q1, [r3], #16 -; CHECK-NEXT: bne .LBB6_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB6_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %C = load half, half* %CC @@ -338,16 +345,17 @@ ; CHECK-NEXT: cmp.w r12, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB7_1: @ %vector.ph ; CHECK-NEXT: ldrh r2, [r2] -; CHECK-NEXT: .LBB7_1: @ %vector.body +; CHECK-NEXT: .LBB7_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: subs.w r12, r12, #8 ; CHECK-NEXT: vfmas.f16 q1, q0, r2 ; CHECK-NEXT: vstrb.8 q1, [r3], #16 -; CHECK-NEXT: bne .LBB7_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB7_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %C = load half, half* %CC @@ -390,16 +398,17 @@ ; CHECK-NEXT: cmp.w r12, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB8_1: @ %vector.ph ; CHECK-NEXT: ldrh r2, [r2] -; CHECK-NEXT: .LBB8_1: @ %vector.body +; CHECK-NEXT: .LBB8_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: subs.w r12, r12, #8 ; CHECK-NEXT: vfma.f16 q1, q0, r2 ; CHECK-NEXT: vstrb.8 q1, [r3], #16 -; CHECK-NEXT: bne .LBB8_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB8_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %C = load half, half* %CC @@ -442,16 +451,17 @@ ; CHECK-NEXT: cmp.w r12, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB9_1: @ %vector.ph ; CHECK-NEXT: ldrh r2, [r2] -; CHECK-NEXT: .LBB9_1: @ %vector.body +; CHECK-NEXT: .LBB9_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: subs.w r12, r12, #8 ; CHECK-NEXT: vfma.f16 q1, q0, r2 ; CHECK-NEXT: vstrb.8 q1, [r3], #16 -; CHECK-NEXT: bne .LBB9_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB9_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %C = load half, half* %CC @@ -495,10 +505,11 @@ ; CHECK-NEXT: cmp.w r12, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB10_1: @ %vector.ph ; CHECK-NEXT: ldrh r2, [r2] ; CHECK-NEXT: vdup.16 q0, r2 ; CHECK-NEXT: vneg.f16 q0, q0 -; CHECK-NEXT: .LBB10_1: @ %vector.body +; CHECK-NEXT: .LBB10_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 @@ -506,8 +517,8 @@ ; CHECK-NEXT: subs.w r12, r12, #8 ; CHECK-NEXT: vfma.f16 q3, q2, q1 ; CHECK-NEXT: vstrb.8 q3, [r3], #16 -; CHECK-NEXT: bne .LBB10_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB10_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %C = load half, half* %CC @@ -550,9 +561,10 @@ ; CHECK-NEXT: cmp.w r12, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB11_1: @ %vector.ph ; CHECK-NEXT: ldrh r2, [r2] ; CHECK-NEXT: vdup.16 q0, r2 -; CHECK-NEXT: .LBB11_1: @ %vector.body +; CHECK-NEXT: .LBB11_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 @@ -560,8 +572,8 @@ ; CHECK-NEXT: subs.w r12, r12, #8 ; CHECK-NEXT: vfms.f16 q3, q2, q1 ; CHECK-NEXT: vstrb.8 q3, [r3], #16 -; CHECK-NEXT: bne .LBB11_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB11_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %C = load half, half* %CC @@ -604,8 +616,9 @@ ; CHECK-NEXT: cmp.w r12, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB12_1: @ %vector.ph ; CHECK-NEXT: ldrh r2, [r2] -; CHECK-NEXT: .LBB12_1: @ %vector.body +; CHECK-NEXT: .LBB12_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 @@ -613,8 +626,8 @@ ; CHECK-NEXT: vneg.f16 q0, q0 ; CHECK-NEXT: vfma.f16 q0, q1, r2 ; CHECK-NEXT: vstrb.8 q0, [r3], #16 -; CHECK-NEXT: bne .LBB12_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB12_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %C = load half, half* %CC @@ -657,8 +670,9 @@ ; CHECK-NEXT: cmp.w r12, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB13_1: @ %vector.ph ; CHECK-NEXT: ldrh r2, [r2] -; CHECK-NEXT: .LBB13_1: @ %vector.body +; CHECK-NEXT: .LBB13_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 @@ -666,8 +680,8 @@ ; CHECK-NEXT: vneg.f16 q0, q0 ; CHECK-NEXT: vfma.f16 q0, q1, r2 ; CHECK-NEXT: vstrb.8 q0, [r3], #16 -; CHECK-NEXT: bne .LBB13_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB13_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %C = load half, half* %CC diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -7,15 +7,16 @@ ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: .LBB0_1: @ %vector.body +; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vadd.f32 q0, q0, r3 ; CHECK-NEXT: vstrb.8 q0, [r1], #16 -; CHECK-NEXT: bne .LBB0_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB0_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %0 = and i32 %n, 7 @@ -52,15 +53,16 @@ ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB1_1: @ %vector.ph ; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: .LBB1_1: @ %vector.body +; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vadd.f32 q0, q0, r3 ; CHECK-NEXT: vstrb.8 q0, [r1], #16 -; CHECK-NEXT: bne .LBB1_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB1_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %0 = and i32 %n, 7 @@ -97,15 +99,16 @@ ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB2_1: @ %vector.ph ; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: .LBB2_1: @ %vector.body +; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmul.f32 q0, q0, r3 ; CHECK-NEXT: vstrb.8 q0, [r1], #16 -; CHECK-NEXT: bne .LBB2_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB2_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %0 = and i32 %n, 7 @@ -142,15 +145,16 @@ ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB3_1: @ %vector.ph ; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: .LBB3_1: @ %vector.body +; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmul.f32 q0, q0, r3 ; CHECK-NEXT: vstrb.8 q0, [r1], #16 -; CHECK-NEXT: bne .LBB3_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB3_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %0 = and i32 %n, 7 @@ -187,15 +191,16 @@ ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB4_1: @ %vector.ph ; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: .LBB4_1: @ %vector.body +; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vsub.f32 q0, q0, r3 ; CHECK-NEXT: vstrb.8 q0, [r1], #16 -; CHECK-NEXT: bne .LBB4_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB4_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %0 = and i32 %n, 7 @@ -232,16 +237,17 @@ ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB5_1: @ %vector.ph ; CHECK-NEXT: vmov r3, s0 ; CHECK-NEXT: vdup.32 q0, r3 -; CHECK-NEXT: .LBB5_1: @ %vector.body +; CHECK-NEXT: .LBB5_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vsub.f32 q1, q0, q1 ; CHECK-NEXT: vstrb.8 q1, [r1], #16 -; CHECK-NEXT: bne .LBB5_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB5_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %0 = and i32 %n, 7 @@ -279,16 +285,17 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB6_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: .LBB6_1: @ %vector.body +; CHECK-NEXT: .LBB6_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vfmas.f32 q1, q0, r12 ; CHECK-NEXT: vstrb.8 q1, [r2], #16 -; CHECK-NEXT: bne .LBB6_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB6_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %0 = and i32 %n, 7 @@ -329,16 +336,17 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB7_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: .LBB7_1: @ %vector.body +; CHECK-NEXT: .LBB7_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vfmas.f32 q1, q0, r12 ; CHECK-NEXT: vstrb.8 q1, [r2], #16 -; CHECK-NEXT: bne .LBB7_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB7_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %0 = and i32 %n, 7 @@ -379,16 +387,17 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB8_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: .LBB8_1: @ %vector.body +; CHECK-NEXT: .LBB8_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vfma.f32 q1, q0, r12 ; CHECK-NEXT: vstrb.8 q1, [r2], #16 -; CHECK-NEXT: bne .LBB8_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB8_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %0 = and i32 %n, 7 @@ -429,16 +438,17 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB9_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: .LBB9_1: @ %vector.body +; CHECK-NEXT: .LBB9_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vfma.f32 q1, q0, r12 ; CHECK-NEXT: vstrb.8 q1, [r2], #16 -; CHECK-NEXT: bne .LBB9_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB9_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %0 = and i32 %n, 7 @@ -480,10 +490,11 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB10_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: vdup.32 q0, r12 ; CHECK-NEXT: vneg.f32 q0, q0 -; CHECK-NEXT: .LBB10_1: @ %vector.body +; CHECK-NEXT: .LBB10_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 @@ -491,8 +502,8 @@ ; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vfma.f32 q3, q2, q1 ; CHECK-NEXT: vstrb.8 q3, [r2], #16 -; CHECK-NEXT: bne .LBB10_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB10_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %0 = and i32 %n, 7 @@ -533,9 +544,10 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB11_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: vdup.32 q0, r12 -; CHECK-NEXT: .LBB11_1: @ %vector.body +; CHECK-NEXT: .LBB11_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 @@ -543,8 +555,8 @@ ; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vfms.f32 q3, q2, q1 ; CHECK-NEXT: vstrb.8 q3, [r2], #16 -; CHECK-NEXT: bne .LBB11_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB11_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %0 = and i32 %n, 7 @@ -585,8 +597,9 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB12_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: .LBB12_1: @ %vector.body +; CHECK-NEXT: .LBB12_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 @@ -594,8 +607,8 @@ ; CHECK-NEXT: vneg.f32 q0, q0 ; CHECK-NEXT: vfma.f32 q0, q1, r12 ; CHECK-NEXT: vstrb.8 q0, [r2], #16 -; CHECK-NEXT: bne .LBB12_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB12_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %0 = and i32 %n, 7 @@ -636,8 +649,9 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB13_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: .LBB13_1: @ %vector.body +; CHECK-NEXT: .LBB13_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 @@ -645,8 +659,8 @@ ; CHECK-NEXT: vneg.f32 q0, q0 ; CHECK-NEXT: vfma.f32 q0, q1, r12 ; CHECK-NEXT: vstrb.8 q0, [r2], #16 -; CHECK-NEXT: bne .LBB13_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB13_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: bx lr entry: %0 = and i32 %n, 7 diff --git a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll --- a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll @@ -9,18 +9,19 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r3 -; CHECK-NEXT: .LBB0_1: @ %vector.body +; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vfmas.f32 q1, q0, r12 ; CHECK-NEXT: vstrw.32 q1, [r2], #16 -; CHECK-NEXT: letp lr, .LBB0_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB0_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} entry: %cmp8 = icmp sgt i32 %n, 0 @@ -71,18 +72,19 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: .LBB1_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r3 -; CHECK-NEXT: .LBB1_1: @ %vector.body +; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vfmas.f32 q1, q0, r12 ; CHECK-NEXT: vstrw.32 q1, [r2], #16 -; CHECK-NEXT: letp lr, .LBB1_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB1_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} entry: %cmp8 = icmp sgt i32 %n, 0 @@ -134,18 +136,19 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: .LBB2_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r3 -; CHECK-NEXT: .LBB2_1: @ %vector.body +; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vfma.f32 q1, q0, r12 ; CHECK-NEXT: vstrw.32 q1, [r2], #16 -; CHECK-NEXT: letp lr, .LBB2_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB2_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} entry: %cmp8 = icmp sgt i32 %n, 0 @@ -196,18 +199,19 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: .LBB3_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r3 -; CHECK-NEXT: .LBB3_1: @ %vector.body +; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vfma.f32 q1, q0, r12 ; CHECK-NEXT: vstrw.32 q1, [r2], #16 -; CHECK-NEXT: letp lr, .LBB3_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB3_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} entry: %cmp8 = icmp sgt i32 %n, 0 @@ -259,19 +263,20 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: .LBB4_1: @ %vector.ph ; CHECK-NEXT: vmov r4, s0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: eor r12, r4, #-2147483648 ; CHECK-NEXT: movs r4, #0 -; CHECK-NEXT: .LBB4_1: @ %vector.body +; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vfmas.f32 q1, q0, r12 ; CHECK-NEXT: vstrw.32 q1, [r2], #16 -; CHECK-NEXT: letp lr, .LBB4_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB4_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} entry: %cmp8 = icmp sgt i32 %n, 0 @@ -323,12 +328,13 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: .LBB5_1: @ %vector.ph ; CHECK-NEXT: vmov r4, s0 ; CHECK-NEXT: vdup.32 q0, r4 ; CHECK-NEXT: vneg.f32 q0, q0 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 -; CHECK-NEXT: .LBB5_1: @ %vector.body +; CHECK-NEXT: .LBB5_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vmov q3, q0 @@ -336,8 +342,8 @@ ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 ; CHECK-NEXT: vfma.f32 q3, q2, q1 ; CHECK-NEXT: vstrw.32 q3, [r2], #16 -; CHECK-NEXT: letp lr, .LBB5_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB5_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} entry: %cmp8 = icmp sgt i32 %n, 0 @@ -389,11 +395,12 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB6_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: vdup.32 q0, r12 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 -; CHECK-NEXT: .LBB6_1: @ %vector.body +; CHECK-NEXT: .LBB6_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vmov q3, q0 @@ -401,8 +408,8 @@ ; CHECK-NEXT: vldrw.u32 q2, [r0], #16 ; CHECK-NEXT: vfms.f32 q3, q2, q1 ; CHECK-NEXT: vstrw.32 q3, [r2], #16 -; CHECK-NEXT: letp lr, .LBB6_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB6_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %cmp8 = icmp sgt i32 %n, 0 @@ -454,11 +461,12 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB7_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: vdup.32 q0, r12 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 -; CHECK-NEXT: .LBB7_1: @ %vector.body +; CHECK-NEXT: .LBB7_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vmov q3, q0 @@ -466,8 +474,8 @@ ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 ; CHECK-NEXT: vfms.f32 q3, q2, q1 ; CHECK-NEXT: vstrw.32 q3, [r2], #16 -; CHECK-NEXT: letp lr, .LBB7_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB7_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %cmp8 = icmp sgt i32 %n, 0 @@ -519,19 +527,20 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: .LBB8_1: @ %vector.ph ; CHECK-NEXT: vmov r4, s0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: eor r12, r4, #-2147483648 ; CHECK-NEXT: movs r4, #0 -; CHECK-NEXT: .LBB8_1: @ %vector.body +; CHECK-NEXT: .LBB8_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vfma.f32 q1, q0, r12 ; CHECK-NEXT: vstrw.32 q1, [r2], #16 -; CHECK-NEXT: letp lr, .LBB8_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB8_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} entry: %cmp8 = icmp sgt i32 %n, 0 @@ -583,19 +592,20 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB9_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: vdup.32 q0, r12 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 -; CHECK-NEXT: .LBB9_1: @ %vector.body +; CHECK-NEXT: .LBB9_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 ; CHECK-NEXT: vfms.f32 q2, q1, q0 ; CHECK-NEXT: vstrw.32 q2, [r2], #16 -; CHECK-NEXT: letp lr, .LBB9_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB9_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %cmp8 = icmp sgt i32 %n, 0 @@ -647,10 +657,11 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: .LBB10_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r3 -; CHECK-NEXT: .LBB10_1: @ %vector.body +; CHECK-NEXT: .LBB10_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 @@ -658,8 +669,8 @@ ; CHECK-NEXT: vneg.f32 q1, q1 ; CHECK-NEXT: vfma.f32 q1, q0, r12 ; CHECK-NEXT: vstrw.32 q1, [r2], #16 -; CHECK-NEXT: letp lr, .LBB10_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB10_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} entry: %cmp8 = icmp sgt i32 %n, 0 @@ -711,10 +722,11 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: .LBB11_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r3 -; CHECK-NEXT: .LBB11_1: @ %vector.body +; CHECK-NEXT: .LBB11_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 @@ -722,8 +734,8 @@ ; CHECK-NEXT: vneg.f32 q1, q1 ; CHECK-NEXT: vfma.f32 q1, q0, r12 ; CHECK-NEXT: vstrw.32 q1, [r2], #16 -; CHECK-NEXT: letp lr, .LBB11_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: letp lr, .LBB11_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} entry: %cmp8 = icmp sgt i32 %n, 0 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll --- a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll @@ -317,6 +317,7 @@ ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: .LBB8_1: @ %vector.ph.preheader ; CHECK-NEXT: bic r12, r2, #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: sub.w lr, r12, #4 @@ -324,26 +325,26 @@ ; CHECK-NEXT: adr r3, .LCPI8_0 ; CHECK-NEXT: vldrw.u32 q0, [r3] ; CHECK-NEXT: vadd.i32 q0, q0, r0 -; CHECK-NEXT: .LBB8_1: @ %vector.ph +; CHECK-NEXT: .LBB8_2: @ %vector.ph ; CHECK-NEXT: @ =>This Loop Header: Depth=1 -; CHECK-NEXT: @ Child Loop BB8_2 Depth 2 +; CHECK-NEXT: @ Child Loop BB8_3 Depth 2 ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: dls lr, r4 -; CHECK-NEXT: .LBB8_2: @ %vector.body -; CHECK-NEXT: @ Parent Loop BB8_1 Depth=1 +; CHECK-NEXT: .LBB8_3: @ %vector.body +; CHECK-NEXT: @ Parent Loop BB8_2 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: vldrw.u32 q2, [q1, #16]! ; CHECK-NEXT: vstrb.8 q2, [r0], #16 -; CHECK-NEXT: le lr, .LBB8_2 -; CHECK-NEXT: @ %bb.3: @ %middle.block -; CHECK-NEXT: @ in Loop: Header=BB8_1 Depth=1 +; CHECK-NEXT: le lr, .LBB8_3 +; CHECK-NEXT: @ %bb.4: @ %middle.block +; CHECK-NEXT: @ in Loop: Header=BB8_2 Depth=1 ; CHECK-NEXT: cmp r12, r2 -; CHECK-NEXT: bne .LBB8_1 -; CHECK-NEXT: @ %bb.4: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB8_2 +; CHECK-NEXT: @ %bb.5: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.5: +; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: .LCPI8_0: ; CHECK-NEXT: .long 4294967280 @ 0xfffffff0 ; CHECK-NEXT: .long 4294967284 @ 0xfffffff4 @@ -489,6 +490,7 @@ ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: .LBB10_1: @ %vector.ph.preheader ; CHECK-NEXT: bic r12, r2, #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: sub.w lr, r12, #4 @@ -496,26 +498,26 @@ ; CHECK-NEXT: adr r3, .LCPI10_0 ; CHECK-NEXT: vldrw.u32 q0, [r3] ; CHECK-NEXT: vadd.i32 q0, q0, r0 -; CHECK-NEXT: .LBB10_1: @ %vector.ph +; CHECK-NEXT: .LBB10_2: @ %vector.ph ; CHECK-NEXT: @ =>This Loop Header: Depth=1 -; CHECK-NEXT: @ Child Loop BB10_2 Depth 2 +; CHECK-NEXT: @ Child Loop BB10_3 Depth 2 ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: dls lr, r4 -; CHECK-NEXT: .LBB10_2: @ %vector.body -; CHECK-NEXT: @ Parent Loop BB10_1 Depth=1 +; CHECK-NEXT: .LBB10_3: @ %vector.body +; CHECK-NEXT: @ Parent Loop BB10_2 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: vldrw.u32 q2, [q1, #508]! ; CHECK-NEXT: vstrb.8 q2, [r0], #16 -; CHECK-NEXT: le lr, .LBB10_2 -; CHECK-NEXT: @ %bb.3: @ %middle.block -; CHECK-NEXT: @ in Loop: Header=BB10_1 Depth=1 +; CHECK-NEXT: le lr, .LBB10_3 +; CHECK-NEXT: @ %bb.4: @ %middle.block +; CHECK-NEXT: @ in Loop: Header=BB10_2 Depth=1 ; CHECK-NEXT: cmp r12, r2 -; CHECK-NEXT: bne .LBB10_1 -; CHECK-NEXT: @ %bb.4: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB10_2 +; CHECK-NEXT: @ %bb.5: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.5: +; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: .LCPI10_0: ; CHECK-NEXT: .long 4294966788 @ 0xfffffe04 ; CHECK-NEXT: .long 4294966792 @ 0xfffffe08 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll b/llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll --- a/llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll @@ -737,18 +737,19 @@ ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB22_1: @ %vector.body.preheader ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r2, lsr #2 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB22_1: @ %vector.body +; CHECK-NEXT: .LBB22_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vptt.i32 ne, q0, zr ; CHECK-NEXT: vldrwt.u32 q1, [q0] ; CHECK-NEXT: vstrwt.32 q1, [r0], #16 -; CHECK-NEXT: le lr, .LBB22_1 -; CHECK-NEXT: @ %bb.2: @ %for.end +; CHECK-NEXT: le lr, .LBB22_2 +; CHECK-NEXT: @ %bb.3: @ %for.end ; CHECK-NEXT: pop {r7, pc} entry: %and = and i32 %n, -16 @@ -782,18 +783,19 @@ ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB23_1: @ %vector.body.preheader ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r2, lsr #2 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB23_1: @ %vector.body +; CHECK-NEXT: .LBB23_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vptt.i32 ne, q0, zr ; CHECK-NEXT: vldrwt.u32 q1, [q0] ; CHECK-NEXT: vstrwt.32 q1, [r0], #16 -; CHECK-NEXT: le lr, .LBB23_1 -; CHECK-NEXT: @ %bb.2: @ %for.end +; CHECK-NEXT: le lr, .LBB23_2 +; CHECK-NEXT: @ %bb.3: @ %for.end ; CHECK-NEXT: pop {r7, pc} entry: %and = and i32 %n, -16 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll b/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll --- a/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll @@ -9,6 +9,7 @@ ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r4, pc} +; CHECK-NEXT: .LBB0_1: @ %vector.ph.preheader ; CHECK-NEXT: bic r12, r2, #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: sub.w lr, r12, #4 @@ -16,26 +17,26 @@ ; CHECK-NEXT: adr r3, .LCPI0_0 ; CHECK-NEXT: vldrw.u32 q0, [r3] ; CHECK-NEXT: vadd.i32 q0, q0, r0 -; CHECK-NEXT: .LBB0_1: @ %vector.ph +; CHECK-NEXT: .LBB0_2: @ %vector.ph ; CHECK-NEXT: @ =>This Loop Header: Depth=1 -; CHECK-NEXT: @ Child Loop BB0_2 Depth 2 +; CHECK-NEXT: @ Child Loop BB0_3 Depth 2 ; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: dls lr, r4 -; CHECK-NEXT: .LBB0_2: @ %vector.body -; CHECK-NEXT: @ Parent Loop BB0_1 Depth=1 +; CHECK-NEXT: .LBB0_3: @ %vector.body +; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: vldrw.u32 q2, [q1, #16]! ; CHECK-NEXT: vstrb.8 q2, [r0], #16 -; CHECK-NEXT: le lr, .LBB0_2 -; CHECK-NEXT: @ %bb.3: @ %middle.block -; CHECK-NEXT: @ in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: le lr, .LBB0_3 +; CHECK-NEXT: @ %bb.4: @ %middle.block +; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: cmp r12, r2 -; CHECK-NEXT: bne .LBB0_1 -; CHECK-NEXT: @ %bb.4: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB0_2 +; CHECK-NEXT: @ %bb.5: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.5: +; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: .LCPI0_0: ; CHECK-NEXT: .long 4294967280 @ 0xfffffff0 ; CHECK-NEXT: .long 4294967284 @ 0xfffffff4 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll b/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll --- a/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll @@ -9,6 +9,7 @@ ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: mvn r3, #3 ; CHECK-NEXT: add.w r1, r3, r1, lsl #2 ; CHECK-NEXT: movs r3, #1 @@ -16,14 +17,14 @@ ; CHECK-NEXT: add.w lr, r3, r1, lsr #2 ; CHECK-NEXT: rsbs r1, r2, #0 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB0_1: @ %vector.body +; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vpte.s32 ge, q1, r2 ; CHECK-NEXT: vcmpt.s32 le, q1, r1 ; CHECK-NEXT: vstrwe.32 q0, [r0], #16 -; CHECK-NEXT: le lr, .LBB0_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: le lr, .LBB0_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %conv = zext i16 %N to i32 @@ -65,6 +66,7 @@ ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB1_1: @ %vector.ph ; CHECK-NEXT: mvn r3, #7 ; CHECK-NEXT: add.w r1, r3, r1, lsl #3 ; CHECK-NEXT: movs r3, #1 @@ -72,14 +74,14 @@ ; CHECK-NEXT: add.w lr, r3, r1, lsr #3 ; CHECK-NEXT: rsbs r1, r2, #0 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB1_1: @ %vector.body +; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q1, [r0] ; CHECK-NEXT: vpte.s16 ge, q1, r2 ; CHECK-NEXT: vcmpt.s16 le, q1, r1 ; CHECK-NEXT: vstrhe.16 q0, [r0], #16 -; CHECK-NEXT: le lr, .LBB1_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: le lr, .LBB1_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %conv2 = zext i16 %N to i32 @@ -121,6 +123,7 @@ ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB2_1: @ %vector.ph ; CHECK-NEXT: mvn r3, #15 ; CHECK-NEXT: add.w r1, r3, r1, lsl #4 ; CHECK-NEXT: movs r3, #1 @@ -128,14 +131,14 @@ ; CHECK-NEXT: add.w lr, r3, r1, lsr #4 ; CHECK-NEXT: rsbs r1, r2, #0 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB2_1: @ %vector.body +; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u8 q1, [r0] ; CHECK-NEXT: vpte.s8 ge, q1, r2 ; CHECK-NEXT: vcmpt.s8 le, q1, r1 ; CHECK-NEXT: vstrbe.8 q0, [r0], #16 -; CHECK-NEXT: le lr, .LBB2_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: le lr, .LBB2_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %conv2 = zext i16 %N to i32 @@ -177,6 +180,7 @@ ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB3_1: @ %vector.ph ; CHECK-NEXT: mvn r2, #3 ; CHECK-NEXT: add.w r1, r2, r1, lsl #2 ; CHECK-NEXT: movs r2, #1 @@ -185,14 +189,14 @@ ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: eor r2, r1, #-2147483648 -; CHECK-NEXT: .LBB3_1: @ %vector.body +; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vpte.f32 ge, q1, r1 ; CHECK-NEXT: vcmpt.f32 le, q1, r2 ; CHECK-NEXT: vstrwe.32 q0, [r0], #16 -; CHECK-NEXT: le lr, .LBB3_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: le lr, .LBB3_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %conv = zext i16 %N to i32 @@ -234,6 +238,7 @@ ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB4_1: @ %vector.ph ; CHECK-NEXT: mvn r3, #7 ; CHECK-NEXT: add.w r1, r3, r1, lsl #3 ; CHECK-NEXT: vmov r2, s0 @@ -243,14 +248,14 @@ ; CHECK-NEXT: vmov.f16 r1, s0 ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB4_1: @ %vector.body +; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q1, [r0] ; CHECK-NEXT: vpte.f16 ge, q1, r2 ; CHECK-NEXT: vcmpt.f16 le, q1, r1 ; CHECK-NEXT: vstrhe.16 q0, [r0], #16 -; CHECK-NEXT: le lr, .LBB4_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: le lr, .LBB4_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %0 = bitcast float %T.coerce to i32 @@ -297,6 +302,7 @@ ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB5_1: @ %vector.ph ; CHECK-NEXT: mvn r3, #3 ; CHECK-NEXT: add.w r1, r3, r1, lsl #2 ; CHECK-NEXT: movs r3, #1 @@ -304,14 +310,14 @@ ; CHECK-NEXT: add.w lr, r3, r1, lsr #2 ; CHECK-NEXT: rsbs r1, r2, #0 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB5_1: @ %vector.body +; CHECK-NEXT: .LBB5_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vpte.s32 ge, q1, r2 ; CHECK-NEXT: vcmpt.s32 le, q1, r1 ; CHECK-NEXT: vstrwe.32 q0, [r0], #16 -; CHECK-NEXT: le lr, .LBB5_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: le lr, .LBB5_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %conv = zext i16 %N to i32 @@ -353,6 +359,7 @@ ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB6_1: @ %vector.ph ; CHECK-NEXT: mvn r3, #7 ; CHECK-NEXT: add.w r1, r3, r1, lsl #3 ; CHECK-NEXT: movs r3, #1 @@ -360,14 +367,14 @@ ; CHECK-NEXT: add.w lr, r3, r1, lsr #3 ; CHECK-NEXT: rsbs r1, r2, #0 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB6_1: @ %vector.body +; CHECK-NEXT: .LBB6_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q1, [r0] ; CHECK-NEXT: vpte.s16 ge, q1, r2 ; CHECK-NEXT: vcmpt.s16 le, q1, r1 ; CHECK-NEXT: vstrhe.16 q0, [r0], #16 -; CHECK-NEXT: le lr, .LBB6_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: le lr, .LBB6_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %conv2 = zext i16 %N to i32 @@ -409,6 +416,7 @@ ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB7_1: @ %vector.ph ; CHECK-NEXT: mvn r3, #15 ; CHECK-NEXT: add.w r1, r3, r1, lsl #4 ; CHECK-NEXT: movs r3, #1 @@ -416,14 +424,14 @@ ; CHECK-NEXT: add.w lr, r3, r1, lsr #4 ; CHECK-NEXT: rsbs r1, r2, #0 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB7_1: @ %vector.body +; CHECK-NEXT: .LBB7_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u8 q1, [r0] ; CHECK-NEXT: vpte.s8 ge, q1, r2 ; CHECK-NEXT: vcmpt.s8 le, q1, r1 ; CHECK-NEXT: vstrbe.8 q0, [r0], #16 -; CHECK-NEXT: le lr, .LBB7_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: le lr, .LBB7_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %conv2 = zext i16 %N to i32 @@ -465,6 +473,7 @@ ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB8_1: @ %vector.ph ; CHECK-NEXT: mvn r2, #3 ; CHECK-NEXT: add.w r1, r2, r1, lsl #2 ; CHECK-NEXT: movs r2, #1 @@ -473,14 +482,14 @@ ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: eor r2, r1, #-2147483648 -; CHECK-NEXT: .LBB8_1: @ %vector.body +; CHECK-NEXT: .LBB8_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vpte.f32 ge, q1, r1 ; CHECK-NEXT: vcmpt.f32 le, q1, r2 ; CHECK-NEXT: vstrwe.32 q0, [r0], #16 -; CHECK-NEXT: le lr, .LBB8_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: le lr, .LBB8_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %conv = zext i16 %N to i32 @@ -522,6 +531,7 @@ ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} +; CHECK-NEXT: .LBB9_1: @ %vector.ph ; CHECK-NEXT: mvn r3, #7 ; CHECK-NEXT: add.w r1, r3, r1, lsl #3 ; CHECK-NEXT: vmov r2, s0 @@ -531,14 +541,14 @@ ; CHECK-NEXT: vmov.f16 r1, s0 ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB9_1: @ %vector.body +; CHECK-NEXT: .LBB9_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q1, [r0] ; CHECK-NEXT: vpte.f16 ge, q1, r2 ; CHECK-NEXT: vcmpt.f16 le, q1, r1 ; CHECK-NEXT: vstrhe.16 q0, [r0], #16 -; CHECK-NEXT: le lr, .LBB9_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: le lr, .LBB9_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: %0 = bitcast float %T.coerce to i32 diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll --- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -1537,6 +1537,7 @@ ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} +; CHECK-NEXT: .LBB8_1: @ %vector.ph ; CHECK-NEXT: add.w r12, r3, #3 ; CHECK-NEXT: adr r4, .LCPI8_0 ; CHECK-NEXT: bic r12, r12, #3 @@ -1548,7 +1549,7 @@ ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vdup.32 q1, r12 ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB8_1: @ %vector.body +; CHECK-NEXT: .LBB8_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vdup.32 q2, r3 ; CHECK-NEXT: adds r3, #4 @@ -1561,11 +1562,11 @@ ; CHECK-NEXT: vmovlb.s16 q2, q2 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrht.32 q2, [r2], #8 -; CHECK-NEXT: le lr, .LBB8_1 -; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: le lr, .LBB8_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: @ %bb.4: ; CHECK-NEXT: .LCPI8_0: ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 1 @ 0x1 diff --git a/llvm/test/CodeGen/Thumb2/mve-selectcc.ll b/llvm/test/CodeGen/Thumb2/mve-selectcc.ll --- a/llvm/test/CodeGen/Thumb2/mve-selectcc.ll +++ b/llvm/test/CodeGen/Thumb2/mve-selectcc.ll @@ -7,6 +7,7 @@ ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB0_1: @ %select.false ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: @@ -21,6 +22,7 @@ ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB1_1: @ %select.false ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: @@ -35,6 +37,7 @@ ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB2_1: @ %select.false ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: @@ -49,6 +52,7 @@ ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB3_1: @ %select.false ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: @@ -63,6 +67,7 @@ ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB4_1: @ %select.false ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: @@ -77,6 +82,7 @@ ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB5_1: @ %select.false ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: @@ -91,6 +97,7 @@ ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: bxeq lr +; CHECK-NEXT: .LBB6_1: @ %select.false ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: diff --git a/llvm/test/DebugInfo/MIR/ARM/subregister-full-piece.mir b/llvm/test/DebugInfo/MIR/ARM/subregister-full-piece.mir --- a/llvm/test/DebugInfo/MIR/ARM/subregister-full-piece.mir +++ b/llvm/test/DebugInfo/MIR/ARM/subregister-full-piece.mir @@ -1,24 +1,53 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -start-after=livedebugvalues -filetype=obj -o - %s | \ # RUN: llvm-dwarfdump - | FileCheck %s # # This tests the edge-case where a complex fragment has exactly # the size of a subregister of the register the DBG_VALUE points to. # -# CHECK: .debug_info contents: + +# CHECK: DW_TAG_compile_unit +# CHECK: DW_AT_producer ("") +# CHECK: DW_AT_language (DW_LANG_C_plus_plus_14) +# CHECK: DW_AT_name ("t.cpp") +# CHECK: DW_AT_stmt_list (0x00000000) +# CHECK: DW_AT_comp_dir ("/") +# CHECK: DW_AT_APPLE_optimized (true) +# CHECK: DW_AT_low_pc (0x0000000000000000) +# CHECK: DW_AT_high_pc (0x0000000000000008) + +# CHECK: DW_TAG_subprogram +# CHECK: DW_AT_low_pc (0x0000000000000000) +# CHECK: DW_AT_high_pc (0x0000000000000008) +# CHECK: DW_AT_APPLE_omit_frame_ptr (true) +# CHECK: DW_AT_frame_base (DW_OP_reg13 SP) +# CHECK: DW_AT_name ("f") +# CHECK: DW_AT_decl_file ("/t.cpp") +# CHECK: DW_AT_decl_line (1) +# CHECK: DW_AT_external (true) +# CHECK: DW_AT_APPLE_isa (0x01) + # CHECK: DW_TAG_variable -# CHECK-NOT: DW_TAG -# CHECK: DW_AT_location -# Q8 = {D16, D17} -# CHECK-NEXT: DW_OP_regx D16, DW_OP_piece 0x8) -# CHECK-NOT: DW_TAG -# CHECK: DW_AT_name ("q8") +# CHECK: DW_AT_location (DW_OP_regx D16, DW_OP_piece 0x8) +# CHECK: DW_AT_name ("q8") +# CHECK: DW_AT_decl_file ("/t.cpp") +# CHECK: DW_AT_decl_line (1) +# CHECK: DW_AT_type (0x0000005b "uint8x8x2_t") + # CHECK: DW_TAG_variable -# CHECK-NOT: DW_TAG -# CHECK: DW_AT_location -# Q9 = {D18, D19} -# CHECK-NEXT: DW_OP_regx D18, DW_OP_piece 0x7) -# CHECK-NOT: DW_TAG -# CHECK: DW_AT_name ("q9") +# CHECK: DW_AT_location (DW_OP_regx D18, DW_OP_piece 0x7) +# CHECK: DW_AT_name ("q9") +# CHECK: DW_AT_decl_file ("/t.cpp") +# CHECK: DW_AT_decl_line (1) +# CHECK: DW_AT_type (0x0000005b "uint8x8x2_t") + +# CHECK: DW_TAG_structure_type +# CHECK: DW_AT_calling_convention (DW_CC_pass_by_value) +# CHECK: DW_AT_name ("uint8x8x2_t") +# CHECK: DW_AT_byte_size (0x10) +# CHECK: DW_AT_decl_file ("/t.cpp") +# CHECK: DW_AT_decl_line (113) + --- | target triple = "thumbv7s-apple-ios" @@ -45,7 +74,7 @@ liveins: $r2 t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr - t2Bcc %bb.2.for.body, 0, killed $cpsr, debug-location !20 DBG_VALUE $q8, $noreg, !8, !DIExpression(DW_OP_LLVM_fragment, 0, 64), debug-location !20 DBG_VALUE $q9, $noreg, !9, !DIExpression(DW_OP_LLVM_fragment, 0, 56), debug-location !20 + t2Bcc %bb.2.for.body, 0, killed $cpsr, debug-location !20 tB %bb.2.for.body, 14, $noreg