diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1302,6 +1302,8 @@ bool AArch64InstrInfo::optimizePTestInstr( MachineInstr *PTest, unsigned MaskReg, unsigned PredReg, const MachineRegisterInfo *MRI) const { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + auto *Mask = MRI->getUniqueVRegDef(MaskReg); auto *Pred = MRI->getUniqueVRegDef(PredReg); auto NewOp = Pred->getOpcode(); @@ -1366,6 +1368,30 @@ OpChanged = true; break; } + case AArch64::RDFFR_P: { + // rdffr p1.b <--- Optimize this to rdffrs p1.b, p0/z + // + // ptest Mask=p0, Pred=p1.b + if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI)) + return false; + + // If Mask is def'd between the two, the def needs moving up. + // Bail out for now. + if (any_of(instructionsWithoutDebug(std::next(Pred->getIterator()), + PTest->getIterator()), + [TRI, MaskReg](const MachineInstr &I) { + return I.definesRegister(MaskReg, TRI); + })) + return false; + + BuildMI(*Pred->getParent(), Pred, Pred->getDebugLoc(), + get(AArch64::RDFFRS_PPz)) + .addDef(PredReg) + .addUse(MaskReg); + PTest->eraseFromParent(); + Pred->eraseFromParent(); + return true; + } case AArch64::RDFFR_PPz: { // rdffr p1.b, PredMask=p0/z <--- Definition of Pred // ptest Mask=p0, Pred=p1.b <--- If equal masks, remove this and use @@ -1384,8 +1410,6 @@ } } - const TargetRegisterInfo *TRI = &getRegisterInfo(); - // If another instruction between Pred and PTest accesses flags, don't remove // the ptest or update the earlier instruction to modify them. if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI)) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-rdffr.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-rdffr.mir --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-rdffr.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-rdffr.mir @@ -88,3 +88,44 @@ %4:gpr32 = CSINCWr killed %3, $wzr, 0, implicit $nzcv $w0 = ORRWrs %4, %2, 1 RET_ReallyLR implicit $w0 +... +--- +# CHECK-LABEL: name:{{\s*}} substitute_rdffr_p_with_rdffrs_pp +name: substitute_rdffr_p_with_rdffrs_pp +tracksRegLiveness: true +body: | + bb.0: + liveins: $ffr, $p0 + %0:ppr_3b = COPY $p0 + + ; CHECK: RDFFRS_PP + ; CHECK-NOT: PTEST + %1:ppr_3b = RDFFR_P + PTEST_PP killed %0:ppr_3b, killed %1:ppr_3b, implicit-def $nzcv + + ; Consume nzcv + %2:gpr32 = COPY $wzr + %3:gpr32 = CSINCWr killed %2, $wzr, 0, implicit $nzcv + $w0 = COPY %3 + RET_ReallyLR implicit $w0 + +... +--- +# CHECK-LABEL: name:{{\s*}} fail_to_substitute_rdffr_p_with_rdffrs_pp_def_between +name: fail_to_substitute_rdffr_p_with_rdffrs_pp_def_between +tracksRegLiveness: true +body: | + bb.0: + liveins: $ffr, $p0 + + ; CHECK-NOT: RDFFRS_PP + ; CHECK: PTEST + %0:ppr_3b = RDFFR_P + %1:ppr_3b = COPY $p0 + PTEST_PP killed %1:ppr_3b, killed %0:ppr_3b, implicit-def $nzcv + + ; Consume nzcv + %2:gpr32 = COPY $wzr + %3:gpr32 = CSINCWr killed %2, $wzr, 0, implicit $nzcv + $w0 = COPY %3 + RET_ReallyLR implicit $w0