diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1308,8 +1308,9 @@ bool PredIsWhileLike = isWhileOpcode(PredOpcode); if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) { - // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't - // deactivate any lanes OTHER_INST might set. + // For PTEST(PTRUE_ALL, WHILE), if the element size matches the PTEST is + // redundant since WHILE performs an implicit PTEST with an all active + // mask. uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode); uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode); @@ -1318,6 +1319,15 @@ (Mask->getOperand(1).getImm() != 31)) return false; + // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the + // PTEST_LIKE instruction uses the same all active mask and the element + // size matches. + if (PredIsPTestLike) { + auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); + if (Mask != PTestLikeMask) + return false; + } + // Fallthough to simply remove the PTEST. } else if (PredIsPTestLike) { // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll @@ -37,7 +37,9 @@ define i32 @cmpeq_imm_nxv16i8( %pg, %a) { ; CHECK-LABEL: cmpeq_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpeq.nxv16i8( %pg, %a, zeroinitializer) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir @@ -170,7 +170,7 @@ liveins: $p0, $z0 ; CHECK-LABEL: name: cmpeq_imm_nxv16i8 - ; CHECK-NOT: PTEST + ; CHECK: PTEST %1:zpr = COPY $z0 %0:ppr_3b = COPY $p0 %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll @@ -37,7 +37,9 @@ define i32 @cmpge_imm_nxv16i8( %pg, %a) { ; CHECK-LABEL: cmpge_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, zeroinitializer) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll @@ -37,7 +37,9 @@ define i32 @cmpgt_imm_nxv16i8( %pg, %a) { ; CHECK-LABEL: cmpgt_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmpgt p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpgt.nxv16i8( %pg, %a, zeroinitializer) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll @@ -38,7 +38,9 @@ define i32 @cmphi_imm_nxv16i8( %pg, %a) { ; CHECK-LABEL: cmphi_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmphi p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmphi.nxv16i8( %pg, %a, zeroinitializer) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll @@ -37,7 +37,9 @@ define i32 @cmphs_imm_nxv16i8( %pg, %a) { ; CHECK-LABEL: cmphs_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmphs p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmphs.nxv16i8( %pg, %a, zeroinitializer) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll @@ -8,7 +8,9 @@ define i32 @cmple_imm_nxv16i8( %pg, %a) { ; CHECK-LABEL: cmple_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmple p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, zeroinitializer, %a) @@ -292,7 +294,9 @@ define i1 @cmp8_ptest_first_ax( %pg, %a, %b) { ; CHECK-LABEL: cmp8_ptest_first_ax: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, mi ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, %b) @@ -308,7 +312,9 @@ define i1 @cmp8_ptest_last_ax( %pg, %a, %b) { ; CHECK-LABEL: cmp8_ptest_last_ax: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, lo ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, %b) @@ -323,7 +329,9 @@ define i1 @cmp8_ptest_any_ax( %pg, %a, %b) { ; CHECK-LABEL: cmp8_ptest_any_ax: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, %b) @@ -339,7 +347,9 @@ define i1 @cmp32_ptest_first_ax( %pg, %a, %b) { ; CHECK-LABEL: cmp32_ptest_first_ax: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, mi ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) @@ -356,7 +366,9 @@ define i1 @cmp32_ptest_last_ax( %pg, %a, %b) { ; CHECK-LABEL: cmp32_ptest_last_ax: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, lo ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) @@ -372,7 +384,9 @@ define i1 @cmp32_ptest_any_ax( %pg, %a, %b) { ; CHECK-LABEL: cmp32_ptest_any_ax: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll @@ -8,7 +8,9 @@ define i32 @cmplo_imm_nxv16i8( %pg, %a) { ; CHECK-LABEL: cmplo_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmplo p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmphi.nxv16i8( %pg, zeroinitializer, %a) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll @@ -8,7 +8,9 @@ define i32 @cmpls_imm_nxv16i8( %pg, %a) { ; CHECK-LABEL: cmpls_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmpls p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmphs.nxv16i8( %pg, zeroinitializer, %a) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll @@ -8,7 +8,9 @@ define i32 @cmplt_imm_nxv16i8( %pg, %a) { ; CHECK-LABEL: cmplt_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmplt p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpgt.nxv16i8( %pg, zeroinitializer, %a) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll @@ -37,7 +37,9 @@ define i32 @cmpne_imm_nxv16i8( %pg, %a) { ; CHECK-LABEL: cmpne_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpne.nxv16i8( %pg, %a, zeroinitializer) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-match.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-match.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-match.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-match.ll @@ -20,7 +20,9 @@ define i32 @match_imm_nxv16i8( %pg, %a, %b) { ; CHECK-LABEL: match_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.match.nxv16i8( %pg, %a, %b) @@ -49,7 +51,9 @@ define i32 @nmatch_imm_nxv16i8( %pg, %a, %b) { ; CHECK-LABEL: nmatch_imm_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: nmatch p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.nmatch.nxv16i8( %pg, %a, %b)