Index: llvm/lib/CodeGen/PeepholeOptimizer.cpp =================================================================== --- llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -662,7 +662,12 @@ /// Check if a simpler conditional branch can be generated. bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) { - return TII->optimizeCondBranch(MI); + LLVM_DEBUG(dbgs() << "Attempting to optimize conditional branch: " << MI); + if (TII->optimizeCondBranch(MI)) { + LLVM_DEBUG(dbgs() << " -> Successfully optimized conditional branch!\n"); + return true; + } + return false; } /// Try to find the next source that share the same register file Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1267,15 +1267,22 @@ /// on the path. static bool areCFlagsAccessedBetweenInstrs( MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, - const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) { + const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All, + MachineInstr **Accessor = nullptr) { // Early exit if To is at the beginning of the BB. - if (To == To->getParent()->begin()) + if (To == To->getParent()->begin()) { + if (Accessor) + *Accessor = nullptr; return true; + } // Check whether the instructions are in the same basic block // If not, assume the condition flags might get modified somewhere. - if (To->getParent() != From->getParent()) + if (To->getParent() != From->getParent()) { + if (Accessor) + *Accessor = nullptr; return true; + } // From must be above To. assert(std::any_of( @@ -1283,12 +1290,16 @@ [From](MachineInstr &MI) { return MI.getIterator() == From; })); // We iterate backward starting at \p To until we hit \p From. - for (const MachineInstr &Instr : + for (MachineInstr &Instr : instructionsWithoutDebug(++To.getReverse(), From.getReverse())) { if (((AccessToCheck & AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) || - ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI))) + ((AccessToCheck & AK_Read) && + Instr.readsRegister(AArch64::NZCV, TRI))) { + if (Accessor) + *Accessor = &Instr; return true; + } } return false; } @@ -6792,6 +6803,19 @@ MI->setFlags(Flags); } +static bool accessesNonFlagsPhysicalRegister(MachineInstr *MI) { + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (Reg == 0) + continue; + if (Reg.isPhysical() && Reg != AArch64::NZCV) + return true; + } + return false; +} + /// Replace csincr-branch sequence by simple conditional branch /// /// Examples: @@ -6949,11 +6973,63 @@ return false; AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm(); - // Convert only when the condition code is not modified between - // the CSINC and the branch. The CC may be used by other - // instructions in between. - if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write)) - return false; + // Check to see if the condition code is modified between the CSINC and the + // branch. If so, we may still be able to apply the optimisation by first + // moving the intervening instruction above the flag-setting operation that + // CSINC depends upon, i.e. + // Instr1 (implicit-def NZCV) + // ... + // CSINC + // ... + // Instr2 (implicit-def NZCV) -> move above Instr1 if safe + // ... + // TBNZW + MachineInstr *Accessor; + if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write, + &Accessor)) { + // The function areCFlagsAccessedBetweenInstrs walks backwards from MI + // to DefMI, and bails out for the first flag-setting operation + // (Accessor) it finds. + if (Accessor && + // Don't move memory ops for now, or ones that access physical + // registers. Also, we can't move Accessor if the NZCV flags it sets + // are alive in the block successors. + !Accessor->mayLoadOrStore() && + !accessesNonFlagsPhysicalRegister(Accessor) && + !areCFlagsAliveInSuccessors(MBB)) { + // We need to find the instruction that sets the NZCV flags that CSINC + // depends upon. + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineBasicBlock::iterator DefIt = DefMI; + while (!DefIt->modifiesRegister(AArch64::NZCV, TRI)) { + --DefIt; + } + + MachineInstr *FlagsDefMI = &(*DefIt); + for (MachineInstr &Instr : instructionsWithoutDebug( + FlagsDefMI->getIterator(), Accessor->getIterator())) { + // If another instruction also writes to the NZCV flags inbetween we + // should bail out because otherwise we'd have to move both! + if (FlagsDefMI->getIterator() != Instr.getIterator() && + Instr.modifiesRegister(AArch64::NZCV, TRI)) + return false; + for (MachineOperand &MO : Accessor->operands()) { + if (!MO.isReg() || MO.isDef()) + continue; + // We can't move Accessor above the instruction that defines one of + // it's inputs! + if (Instr.modifiesRegister(MO.getReg())) + return false; + } + } + + // It's now safe to move this above the flag-setting op that feeds into + // the CSINC! + Accessor->moveBefore(FlagsDefMI); + } else + return false; + } + MachineBasicBlock &RefToMBB = *MBB; MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB(); DebugLoc DL = MI.getDebugLoc(); Index: llvm/test/CodeGen/AArch64/active_lane_mask.ll =================================================================== --- llvm/test/CodeGen/AArch64/active_lane_mask.ll +++ llvm/test/CodeGen/AArch64/active_lane_mask.ll @@ -525,6 +525,109 @@ } +; == Test for vectorised loops using multiple lane masks for control flow == + +define void @loop_with_two_lane_masks_nxv16i1(ptr noalias nocapture noundef writeonly %dst, i64 noundef %n) { +; CHECK-LABEL: loop_with_two_lane_masks_nxv16i1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: pfalse p0.b +; CHECK-NEXT: mov p1.b, p0.b +; CHECK-NEXT: rdvl x9, #2 +; CHECK-NEXT: rdvl x10, #3 +; CHECK-NEXT: addvl x11, x0, #1 +; CHECK-NEXT: mov z0.b, #0 // =0x0 +; CHECK-NEXT: .LBB32_1: // %vector.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: add x12, x10, x8 +; CHECK-NEXT: add x13, x8, x9 +; CHECK-NEXT: st1b { z0.b }, p0, [x0, x8] +; CHECK-NEXT: st1b { z0.b }, p1, [x11, x8] +; CHECK-NEXT: mov x8, x13 +; CHECK-NEXT: whilelo p1.b, x12, x1 +; CHECK-NEXT: whilelo p0.b, x13, x1 +; CHECK-NEXT: b.mi .LBB32_1 +; CHECK-NEXT: // %bb.2: // %for.end +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.vscale.i64() + %1 = shl nuw nsw i64 %0, 4 + %2 = shl nuw nsw i64 %0, 5 + br label %vector.body + +vector.body: + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %active.lane.mask = phi [ zeroinitializer, %entry ], [ %active.lane.mask.next, %vector.body ] + %active.lane.mask11 = phi [ zeroinitializer, %entry ], [ %active.lane.mask.next13, %vector.body ] + %dst1 = getelementptr inbounds i8, ptr %dst, i64 %index + tail call void @llvm.masked.store.nxv16i8.p0( zeroinitializer, ptr %dst1, i32 1, %active.lane.mask) + %dst2 = getelementptr inbounds i8, ptr %dst1, i64 %1 + tail call void @llvm.masked.store.nxv16i8.p0( zeroinitializer, ptr %dst2, i32 1, %active.lane.mask11) + %index.next = add i64 %index, %2 + %new.index = add i64 %index.next, %1 + %active.lane.mask.next = tail call @llvm.get.active.lane.mask.nxv16i1.i64(i64 %index.next, i64 %n) + %active.lane.mask.next13 = tail call @llvm.get.active.lane.mask.nxv16i1.i64(i64 %new.index, i64 %n) + %bit = extractelement %active.lane.mask.next, i64 0 + br i1 %bit, label %vector.body, label %for.end + +for.end: + ret void +} + + +define void @loop_with_two_lane_masks_nxv4i1(ptr noalias nocapture noundef writeonly %dst, i64 noundef %n) { +; CHECK-LABEL: loop_with_two_lane_masks_nxv4i1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: pfalse p0.b +; CHECK-NEXT: mov p1.b, p0.b +; CHECK-NEXT: rdvl x9, #2 +; CHECK-NEXT: rdvl x10, #3 +; CHECK-NEXT: addvl x11, x0, #1 +; CHECK-NEXT: mov z0.s, #0 // =0x0 +; CHECK-NEXT: .LBB33_1: // %vector.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: add x12, x0, x8 +; CHECK-NEXT: add x13, x11, x8 +; CHECK-NEXT: add x14, x10, x8 +; CHECK-NEXT: add x15, x8, x9 +; CHECK-NEXT: mov x8, x15 +; CHECK-NEXT: st1w { z0.s }, p0, [x12] +; CHECK-NEXT: st1w { z0.s }, p1, [x13] +; CHECK-NEXT: whilelo p1.s, x14, x1 +; CHECK-NEXT: whilelo p0.s, x15, x1 +; CHECK-NEXT: b.mi .LBB33_1 +; CHECK-NEXT: // %bb.2: // %for.end +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.vscale.i64() + %1 = shl nuw nsw i64 %0, 4 + %2 = shl nuw nsw i64 %0, 5 + br label %vector.body + +vector.body: + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %active.lane.mask = phi [ zeroinitializer, %entry ], [ %active.lane.mask.next, %vector.body ] + %active.lane.mask11 = phi [ zeroinitializer, %entry ], [ %active.lane.mask.next13, %vector.body ] + %dst1 = getelementptr inbounds i8, ptr %dst, i64 %index + tail call void @llvm.masked.store.nxv4i32.p0( zeroinitializer, ptr %dst1, i32 1, %active.lane.mask) + %dst2 = getelementptr inbounds i8, ptr %dst1, i64 %1 + tail call void @llvm.masked.store.nxv4i32.p0( zeroinitializer, ptr %dst2, i32 1, %active.lane.mask11) + %index.next = add i64 %index, %2 + %new.index = add i64 %index.next, %1 + %active.lane.mask.next = tail call @llvm.get.active.lane.mask.nxv4i1.i64(i64 %index.next, i64 %n) + %active.lane.mask.next13 = tail call @llvm.get.active.lane.mask.nxv4i1.i64(i64 %new.index, i64 %n) + %bit = extractelement %active.lane.mask.next, i64 0 + br i1 %bit, label %vector.body, label %for.end + +for.end: + ret void +} + +declare i64 @llvm.vscale.i64() +declare void @llvm.masked.store.nxv16i8.p0(, ptr nocapture, i32 immarg, ) +declare void @llvm.masked.store.nxv4i32.p0(, ptr nocapture, i32 immarg, ) + declare @llvm.get.active.lane.mask.nxv32i1.i32(i32, i32) declare @llvm.get.active.lane.mask.nxv16i1.i32(i32, i32) declare @llvm.get.active.lane.mask.nxv8i1.i32(i32, i32) Index: llvm/test/CodeGen/AArch64/sve-loop-two-whiles1.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-loop-two-whiles1.mir @@ -0,0 +1,83 @@ +# RUN: llc -mtriple=aarch64 -mattr=+sve -run-pass=peephole-opt \ +# RUN: -verify-machineinstrs -debug-only=peephole-opt %s -o - 2> %t | FileCheck %s +# RUN: cat %t | FileCheck %s --check-prefix=DEBUG + +--- +# CHECK: name: foo +# CHECK-DAG: %8:ppr = WHILELO_PXX_B killed %19, %10, implicit-def dead $nzcv +# CHECK-NEXT: %7:ppr = WHILELO_PXX_B %18, %10, implicit-def $nzcv +# CHECK-NEXT: ST1B %17, %4, %9, %3 :: (store unknown-size, align 1) +# CHECK-NEXT: %21:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv +# CHECK-NEXT: ST1B %17, %5, %2, %3 :: (store unknown-size, align 1) +# CHECK-NEXT: Bcc 4, %bb.1, implicit $nzcv + +# DEBUG: Attempting to optimize conditional branch: TBNZW killed %21:gpr32, 0, %bb.1 +# DEBUG-NEXT: Successfully optimized conditional branch! +name: foo +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64, preferred-register: '' } + - { id: 1, class: gpr64, preferred-register: '' } + - { id: 2, class: gpr64sp, preferred-register: '' } + - { id: 3, class: gpr64common, preferred-register: '' } + - { id: 4, class: ppr_3b, preferred-register: '' } + - { id: 5, class: ppr_3b, preferred-register: '' } + - { id: 6, class: gpr64all, preferred-register: '' } + - { id: 7, class: ppr, preferred-register: '' } + - { id: 8, class: ppr, preferred-register: '' } + - { id: 9, class: gpr64common, preferred-register: '' } + - { id: 10, class: gpr64, preferred-register: '' } + - { id: 11, class: gpr64all, preferred-register: '' } + - { id: 12, class: ppr, preferred-register: '' } + - { id: 13, class: gpr64, preferred-register: '' } + - { id: 14, class: gpr64, preferred-register: '' } + - { id: 15, class: gpr64sp, preferred-register: '' } + - { id: 16, class: gpr64all, preferred-register: '' } + - { id: 17, class: zpr, preferred-register: '' } + - { id: 18, class: gpr64, preferred-register: '' } + - { id: 19, class: gpr64, preferred-register: '' } + - { id: 20, class: ppr, preferred-register: '' } + - { id: 21, class: gpr32, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%9' } + - { reg: '$x1', virtual-reg: '%10' } +body: | + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $x0, $x1 + + %10:gpr64 = COPY $x1 + %9:gpr64common = COPY $x0 + %13:gpr64 = RDVLI_XI 2 + %14:gpr64 = RDVLI_XI 3 + %15:gpr64sp = ADDVL_XXI %9, 1 + %12:ppr = PFALSE + %16:gpr64all = COPY $xzr + %11:gpr64all = COPY %16 + %2:gpr64sp = COPY %15 + %17:zpr = DUP_ZI_B 0, 0 + %20:ppr = PTRUE_B 31 + + bb.1: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + + %3:gpr64common = PHI %11, %bb.0, %6, %bb.1 + %4:ppr_3b = PHI %12, %bb.0, %7, %bb.1 + %5:ppr_3b = PHI %12, %bb.0, %8, %bb.1 + %18:gpr64 = ADDXrr %3, %13 + %6:gpr64all = COPY %18 + %19:gpr64 = ADDXrr %14, %3 + %7:ppr = WHILELO_PXX_B %18, %10, implicit-def dead $nzcv + PTEST_PP %20, %7, implicit-def $nzcv + ST1B %17, %4, %9, %3 :: (store unknown-size, align 1) + %21:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv + ST1B %17, %5, %2, %3 :: (store unknown-size, align 1) + %8:ppr = WHILELO_PXX_B killed %19, %10, implicit-def dead $nzcv + TBNZW killed %21, 0, %bb.1 + B %bb.2 + + bb.2: + RET_ReallyLR + +... Index: llvm/test/CodeGen/AArch64/sve-loop-two-whiles2.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-loop-two-whiles2.mir @@ -0,0 +1,82 @@ +# RUN: llc -mtriple=aarch64 -mattr=+sve -run-pass=peephole-opt \ +# RUN: -verify-machineinstrs -debug-only=peephole-opt %s -o - 2> %t | FileCheck %s +# RUN: cat %t | FileCheck %s --check-prefix=DEBUG + +--- +# CHECK: name: foo +# CHECK-DAG: %7:ppr = WHILELO_PXX_B %18, %10, implicit-def $nzcv +# CHECK-NEXT: %21:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv +# CHECK-NEXT: $p0 = WHILELO_PXX_B killed %19, %10, implicit-def dead $nzcv +# CHECK-NEXT: %8:ppr = COPY $p0 +# CHECK-NEXT: TBNZW killed %21, 0, %bb.1 + +# DEBUG-NOT: Successfully optimized conditional branch! +name: foo +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64, preferred-register: '' } + - { id: 1, class: gpr64, preferred-register: '' } + - { id: 2, class: gpr64sp, preferred-register: '' } + - { id: 3, class: gpr64common, preferred-register: '' } + - { id: 4, class: ppr_3b, preferred-register: '' } + - { id: 5, class: ppr_3b, preferred-register: '' } + - { id: 6, class: gpr64all, preferred-register: '' } + - { id: 7, class: ppr, preferred-register: '' } + - { id: 8, class: ppr, preferred-register: '' } + - { id: 9, class: gpr64common, preferred-register: '' } + - { id: 10, class: gpr64, preferred-register: '' } + - { id: 11, class: gpr64all, preferred-register: '' } + - { id: 12, class: ppr, preferred-register: '' } + - { id: 13, class: gpr64, preferred-register: '' } + - { id: 14, class: gpr64, preferred-register: '' } + - { id: 15, class: gpr64sp, preferred-register: '' } + - { id: 16, class: gpr64all, preferred-register: '' } + - { id: 17, class: zpr, preferred-register: '' } + - { id: 18, class: gpr64, preferred-register: '' } + - { id: 19, class: gpr64, preferred-register: '' } + - { id: 20, class: ppr, preferred-register: '' } + - { id: 21, class: gpr32, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%9' } + - { reg: '$x1', virtual-reg: '%10' } +body: | + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $x0, $x1 + + %10:gpr64 = COPY $x1 + %9:gpr64common = COPY $x0 + %13:gpr64 = RDVLI_XI 2 + %14:gpr64 = RDVLI_XI 3 + %15:gpr64sp = ADDVL_XXI %9, 1 + %12:ppr = PFALSE + %16:gpr64all = COPY $xzr + %11:gpr64all = COPY %16 + %2:gpr64sp = COPY %15 + %17:zpr = DUP_ZI_B 0, 0 + %20:ppr = PTRUE_B 31 + + bb.1: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + + %3:gpr64common = PHI %11, %bb.0, %6, %bb.1 + %4:ppr_3b = PHI %12, %bb.0, %7, %bb.1 + %5:ppr_3b = PHI %12, %bb.0, %8, %bb.1 + ST1B %17, %4, %9, %3 :: (store unknown-size, align 1) + ST1B %17, %5, %2, %3 :: (store unknown-size, align 1) + %18:gpr64 = ADDXrr %3, %13 + %6:gpr64all = COPY %18 + %19:gpr64 = ADDXrr %14, %3 + %7:ppr = WHILELO_PXX_B %18, %10, implicit-def dead $nzcv + PTEST_PP %20, %7, implicit-def $nzcv + %21:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv + $p0 = WHILELO_PXX_B killed %19, %10, implicit-def dead $nzcv + %8:ppr = COPY $p0 + TBNZW killed %21, 0, %bb.1 + B %bb.2 + + bb.2: + RET_ReallyLR + +... Index: llvm/test/CodeGen/AArch64/sve-loop-two-whiles3.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-loop-two-whiles3.mir @@ -0,0 +1,81 @@ +# RUN: llc -mtriple=aarch64 -mattr=+sve -run-pass=peephole-opt \ +# RUN: -verify-machineinstrs -debug-only=peephole-opt %s -o - 2> %t | FileCheck %s +# RUN: cat %t | FileCheck %s --check-prefix=DEBUG + +--- +# CHECK: name: foo +# CHECK-DAG: %8:ppr = WHILELO_PXX_B killed %19, %10, implicit-def dead $nzcv +# CHECK-NEXT: %7:ppr = WHILELO_PXX_B %18, %10, implicit-def $nzcv +# CHECK-NEXT: %21:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv +# CHECK-NEXT: Bcc 4, %bb.1, implicit $nzcv + +# DEBUG: Attempting to optimize conditional branch: TBNZW killed %21:gpr32, 0, %bb.1 +# DEBUG-NEXT: Successfully optimized conditional branch! +name: foo +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64, preferred-register: '' } + - { id: 1, class: gpr64, preferred-register: '' } + - { id: 2, class: gpr64sp, preferred-register: '' } + - { id: 3, class: gpr64common, preferred-register: '' } + - { id: 4, class: ppr_3b, preferred-register: '' } + - { id: 5, class: ppr_3b, preferred-register: '' } + - { id: 6, class: gpr64all, preferred-register: '' } + - { id: 7, class: ppr, preferred-register: '' } + - { id: 8, class: ppr, preferred-register: '' } + - { id: 9, class: gpr64common, preferred-register: '' } + - { id: 10, class: gpr64, preferred-register: '' } + - { id: 11, class: gpr64all, preferred-register: '' } + - { id: 12, class: ppr, preferred-register: '' } + - { id: 13, class: gpr64, preferred-register: '' } + - { id: 14, class: gpr64, preferred-register: '' } + - { id: 15, class: gpr64sp, preferred-register: '' } + - { id: 16, class: gpr64all, preferred-register: '' } + - { id: 17, class: zpr, preferred-register: '' } + - { id: 18, class: gpr64, preferred-register: '' } + - { id: 19, class: gpr64, preferred-register: '' } + - { id: 20, class: ppr, preferred-register: '' } + - { id: 21, class: gpr32, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%9' } + - { reg: '$x1', virtual-reg: '%10' } +body: | + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $x0, $x1 + + %10:gpr64 = COPY $x1 + %9:gpr64common = COPY $x0 + %13:gpr64 = RDVLI_XI 2 + %14:gpr64 = RDVLI_XI 3 + %15:gpr64sp = ADDVL_XXI %9, 1 + %12:ppr = PFALSE + %16:gpr64all = COPY $xzr + %11:gpr64all = COPY %16 + %2:gpr64sp = COPY %15 + %17:zpr = DUP_ZI_B 0, 0 + %20:ppr = PTRUE_B 31 + + bb.1: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + + %3:gpr64common = PHI %11, %bb.0, %6, %bb.1 + %4:ppr_3b = PHI %12, %bb.0, %7, %bb.1 + %5:ppr_3b = PHI %12, %bb.0, %8, %bb.1 + ST1B %17, %4, %9, %3 :: (store unknown-size, align 1) + ST1B %17, %5, %2, %3 :: (store unknown-size, align 1) + %18:gpr64 = ADDXrr %3, %13 + %6:gpr64all = COPY %18 + %19:gpr64 = ADDXrr %14, %3 + %7:ppr = WHILELO_PXX_B %18, %10, implicit-def dead $nzcv + PTEST_PP %20, %7, implicit-def $nzcv + %21:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv + %8:ppr = WHILELO_PXX_B killed %19, %10, implicit-def dead $nzcv + TBNZW killed %21, 0, %bb.1 + B %bb.2 + + bb.2: + RET_ReallyLR + +... Index: llvm/test/CodeGen/AArch64/sve-loop-two-whiles4.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-loop-two-whiles4.mir @@ -0,0 +1,86 @@ +# RUN: llc -mtriple=aarch64 -mattr=+sve -run-pass=peephole-opt \ +# RUN: -verify-machineinstrs -debug-only=peephole-opt %s -o - 2> %t | FileCheck %s +# RUN: cat %t | FileCheck %s --check-prefix=DEBUG + +--- +# CHECK: name: foo +# CHECK-DAG: %7:ppr = WHILELO_PXX_B %18, %10, implicit-def $nzcv +# CHECK-NEXT: %21:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv +# CHECK-NEXT: %8:ppr = WHILELO_PXX_B %19, %10, implicit-def dead $nzcv +# CHECK-NEXT: %23:ppr = WHILELO_PXX_B killed %18, %19, implicit-def dead $nzcv +# CHECK-NEXT: TBNZW killed %21, 0, %bb.1 + +# DEBUG-NOT: Successfully optimized conditional branch! +name: foo +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64, preferred-register: '' } + - { id: 1, class: gpr64, preferred-register: '' } + - { id: 2, class: gpr64sp, preferred-register: '' } + - { id: 3, class: gpr64common, preferred-register: '' } + - { id: 4, class: ppr_3b, preferred-register: '' } + - { id: 5, class: ppr_3b, preferred-register: '' } + - { id: 6, class: gpr64all, preferred-register: '' } + - { id: 7, class: ppr, preferred-register: '' } + - { id: 8, class: ppr, preferred-register: '' } + - { id: 9, class: gpr64common, preferred-register: '' } + - { id: 10, class: gpr64, preferred-register: '' } + - { id: 11, class: gpr64all, preferred-register: '' } + - { id: 12, class: ppr, preferred-register: '' } + - { id: 13, class: gpr64, preferred-register: '' } + - { id: 14, class: gpr64, preferred-register: '' } + - { id: 15, class: gpr64sp, preferred-register: '' } + - { id: 16, class: gpr64all, preferred-register: '' } + - { id: 17, class: zpr, preferred-register: '' } + - { id: 18, class: gpr64, preferred-register: '' } + - { id: 19, class: gpr64, preferred-register: '' } + - { id: 20, class: ppr, preferred-register: '' } + - { id: 21, class: gpr32, preferred-register: '' } + - { id: 22, class: ppr_3b, preferred-register: '' } + - { id: 23, class: ppr, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%9' } + - { reg: '$x1', virtual-reg: '%10' } +body: | + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $x0, $x1 + + %10:gpr64 = COPY $x1 + %9:gpr64common = COPY $x0 + %13:gpr64 = RDVLI_XI 2 + %14:gpr64 = RDVLI_XI 3 + %15:gpr64sp = ADDVL_XXI %9, 1 + %12:ppr = PFALSE + %16:gpr64all = COPY $xzr + %11:gpr64all = COPY %16 + %2:gpr64sp = COPY %15 + %17:zpr = DUP_ZI_B 0, 0 + %20:ppr = PTRUE_B 31 + + bb.1: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + + %3:gpr64common = PHI %11, %bb.0, %6, %bb.1 + %4:ppr_3b = PHI %12, %bb.0, %7, %bb.1 + %5:ppr_3b = PHI %12, %bb.0, %8, %bb.1 + %22:ppr_3b = PHI %12, %bb.0, %23, %bb.1 + ST1B %17, %4, %9, %3 :: (store unknown-size, align 1) + ST1B %17, %5, %2, %3 :: (store unknown-size, align 1) + ST1B %17, %22, %2, %3 :: (store unknown-size, align 1) + %18:gpr64 = ADDXrr %3, %13 + %6:gpr64all = COPY %18 + %19:gpr64 = ADDXrr %14, %3 + %7:ppr = WHILELO_PXX_B %18, %10, implicit-def dead $nzcv + PTEST_PP %20, %7, implicit-def $nzcv + %21:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv + %8:ppr = WHILELO_PXX_B %19, %10, implicit-def dead $nzcv + %23:ppr = WHILELO_PXX_B killed %18, %19, implicit-def dead $nzcv + TBNZW killed %21, 0, %bb.1 + B %bb.2 + + bb.2: + RET_ReallyLR + +... Index: llvm/test/CodeGen/AArch64/sve-loop-two-whiles5.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-loop-two-whiles5.mir @@ -0,0 +1,81 @@ +# RUN: llc -mtriple=aarch64 -mattr=+sve -run-pass=peephole-opt \ +# RUN: -verify-machineinstrs -debug-only=peephole-opt %s -o - 2> %t | FileCheck %s +# RUN: cat %t | FileCheck %s --check-prefix=DEBUG + +--- +# CHECK: name: foo +# CHECK-DAG: %7:ppr = WHILELO_PXX_B %18, %10, implicit-def $nzcv +# CHECK-NEXT: %19:gpr64 = ADDXrr %14, %3 +# CHECK-NEXT: %21:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv +# CHECK-NEXT: %8:ppr = WHILELO_PXX_B killed %19, %10, implicit-def dead $nzcv +# CHECK-NEXT: TBNZW killed %21, 0, %bb.1 + +# DEBUG-NOT: Successfully optimized conditional branch! +name: foo +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64, preferred-register: '' } + - { id: 1, class: gpr64, preferred-register: '' } + - { id: 2, class: gpr64sp, preferred-register: '' } + - { id: 3, class: gpr64common, preferred-register: '' } + - { id: 4, class: ppr_3b, preferred-register: '' } + - { id: 5, class: ppr_3b, preferred-register: '' } + - { id: 6, class: gpr64all, preferred-register: '' } + - { id: 7, class: ppr, preferred-register: '' } + - { id: 8, class: ppr, preferred-register: '' } + - { id: 9, class: gpr64common, preferred-register: '' } + - { id: 10, class: gpr64, preferred-register: '' } + - { id: 11, class: gpr64all, preferred-register: '' } + - { id: 12, class: ppr, preferred-register: '' } + - { id: 13, class: gpr64, preferred-register: '' } + - { id: 14, class: gpr64, preferred-register: '' } + - { id: 15, class: gpr64sp, preferred-register: '' } + - { id: 16, class: gpr64all, preferred-register: '' } + - { id: 17, class: zpr, preferred-register: '' } + - { id: 18, class: gpr64, preferred-register: '' } + - { id: 19, class: gpr64, preferred-register: '' } + - { id: 20, class: ppr, preferred-register: '' } + - { id: 21, class: gpr32, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%9' } + - { reg: '$x1', virtual-reg: '%10' } +body: | + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $x0, $x1 + + %10:gpr64 = COPY $x1 + %9:gpr64common = COPY $x0 + %13:gpr64 = RDVLI_XI 2 + %14:gpr64 = RDVLI_XI 3 + %15:gpr64sp = ADDVL_XXI %9, 1 + %12:ppr = PFALSE + %16:gpr64all = COPY $xzr + %11:gpr64all = COPY %16 + %2:gpr64sp = COPY %15 + %17:zpr = DUP_ZI_B 0, 0 + %20:ppr = PTRUE_B 31 + + bb.1: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + + %3:gpr64common = PHI %11, %bb.0, %6, %bb.1 + %4:ppr_3b = PHI %12, %bb.0, %7, %bb.1 + %5:ppr_3b = PHI %12, %bb.0, %8, %bb.1 + ST1B %17, %4, %9, %3 :: (store unknown-size, align 1) + ST1B %17, %5, %2, %3 :: (store unknown-size, align 1) + %18:gpr64 = ADDXrr %3, %13 + %6:gpr64all = COPY %18 + %7:ppr = WHILELO_PXX_B %18, %10, implicit-def dead $nzcv + PTEST_PP %20, %7, implicit-def $nzcv + %19:gpr64 = ADDXrr %14, %3 + %21:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv + %8:ppr = WHILELO_PXX_B killed %19, %10, implicit-def dead $nzcv + TBNZW killed %21, 0, %bb.1 + B %bb.2 + + bb.2: + RET_ReallyLR + +...