diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1304,6 +1304,22 @@ const MachineRegisterInfo *MRI) const { auto *Mask = MRI->getUniqueVRegDef(MaskReg); auto *Pred = MRI->getUniqueVRegDef(PredReg); + + // If the predicate is in a different block (possibly because its been + // hoisted out), then assume the flags are set in between statements. + if (Pred->getParent() != PTest->getParent()) + return false; + + const TargetRegisterInfo *TRI = &getRegisterInfo(); + + // If another instruction between the propagation and test sets the + // flags, don't remove the ptest. + MachineBasicBlock::iterator I = Pred, E = PTest; + ++I; // Skip past the predicate op itself. + for (; I != E; ++I) + if (I->modifiesRegister(AArch64::NZCV, TRI)) + return false; + auto NewOp = Pred->getOpcode(); bool OpChanged = false; @@ -1366,30 +1382,25 @@ OpChanged = true; break; } + case AArch64::RDFFR_PPz: { + // rdffr p1.b, PredMask=p0/z <--- Definition of Pred + // ptest Mask=p0, Pred=p1.b <--- If equal masks, remove this and use + // `rdffrs p1.b, p0/z` above. + MachineInstr *RDFFR = Pred; + auto *RDFFRMask = MRI->getUniqueVRegDef(RDFFR->getOperand(1).getReg()); + if (Mask != RDFFRMask) + return false; + + NewOp = AArch64::RDFFRS_PPz; + OpChanged = true; + break; + } default: // Bail out if we don't recognize the input return false; } } - const TargetRegisterInfo *TRI = &getRegisterInfo(); - - // If the predicate is in a different block (possibly because its been - // hoisted out), then assume the flags are set in between statements. - if (Pred->getParent() != PTest->getParent()) - return false; - - // If another instruction between the propagation and test sets the - // flags, don't remove the ptest. - MachineBasicBlock::iterator I = Pred, E = PTest; - ++I; // Skip past the predicate op itself. - for (; I != E; ++I) { - const MachineInstr &Inst = *I; - - // TODO: If the ptest flags are unused, we could still remove it. - if (Inst.modifiesRegister(AArch64::NZCV, TRI)) - return false; - } // If we pass all the checks, it's safe to remove the PTEST and use the flags // as they are prior to PTEST. Sometimes this requires the tested PTEST diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-rdffr.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-rdffr.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-rdffr.mir @@ -0,0 +1,65 @@ +# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s +# Test that RDFFR followed by PTEST is replaced with RDFFRS. +--- +# CHECK-LABEL: name:{{\s*}} substitute_rdffr_pp_with_rdffrs_pp +name: substitute_rdffr_pp_with_rdffrs_pp +tracksRegLiveness: true +body: | + bb.0: + liveins: $ffr, $p0 + %0:ppr_3b = COPY $p0 + + ; CHECK: RDFFRS_PPz + ; CHECK-NOT: PTEST + %1:ppr_3b = RDFFR_PPz %0:ppr_3b + PTEST_PP killed %0:ppr_3b, killed %1:ppr_3b, implicit-def $nzcv + + ; Consume nzcv + %2:gpr32 = COPY $wzr + %3:gpr32 = CSINCWr killed %2, $wzr, 0, implicit $nzcv + $w0 = COPY %3 + RET_ReallyLR implicit $w0 +... +--- +# CHECK-LABEL: name:{{\s*}} fail_to_substitute_rdffr_pp_with_rdffrs_pp_differing_mask +name: fail_to_substitute_rdffr_pp_with_rdffrs_pp_differing_mask +tracksRegLiveness: true +body: | + bb.0: + liveins: $ffr, $p0, $p1 + %0:ppr_3b = COPY $p0 + %1:ppr_3b = COPY $p1 + + ; CHECK: RDFFR_PPz + ; CHECK: PTEST + %2:ppr_3b = RDFFR_PPz %0:ppr_3b + PTEST_PP killed %1:ppr_3b, killed %2:ppr_3b, implicit-def $nzcv + + ; Consume nzcv + %3:gpr32 = COPY $wzr + %4:gpr32 = CSINCWr killed %3, $wzr, 0, implicit $nzcv + $w0 = COPY %4 + RET_ReallyLR implicit $w0 +... +--- +# CHECK-LABEL: name:{{\s*}} fail_to_substitute_rdffr_pp_with_rdffrs_pp_nzcv_clobbered +name: fail_to_substitute_rdffr_pp_with_rdffrs_pp_nzcv_clobbered +tracksRegLiveness: true +body: | + bb.0: + liveins: $ffr, $p0, $x0 + %0:ppr_3b = COPY $p0 + + ; CHECK: RDFFR_PPz + ; CHECK-NEXT: ADDSXrr + ; CHECK-NEXT: PTEST_PP + %1:ppr_3b = RDFFR_PPz %0:ppr_3b + ; Clobber nzcv + $x0 = ADDSXrr $x0, $x0, implicit-def $nzcv + PTEST_PP killed %0:ppr_3b, killed %1:ppr_3b, implicit-def $nzcv + + ; Consume nzcv + %2:gpr32 = COPY $wzr + %3:gpr32 = CSINCWr killed %2, $wzr, 0, implicit $nzcv + $w0 = COPY %3 + RET_ReallyLR implicit $w0