Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1325,10 +1325,30 @@ // Fallthough to simply remove the PTEST. } else if (PredIsPTestLike) { - // For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both - // instructions use the same predicate. + // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the + // flags are set based on the same mask 'PG', but PTEST_LIKE must operate + // on 8-bit predicates like the PTEST. Otherwise, for instructions like + // compare that also support 16/32/64-bit predicates, the implicit PTEST + // performed by the compare could consider fewer lanes for these element + // sizes. + // + // For example, consider + // + // ptrue p0.b ; P0=1111-1111-1111-1111 + // index z0.s, #0, #1 ; Z0=<0,1,2,3> + // index z1.s, #1, #1 ; Z1=<1,2,3,4> + // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001 + // ; ^ last active + // ptest p0, p1.b ; P1=0001-0001-0001-0001 + // ; ^ last active + // + // where the compare generates a canonical all active 32-bit predicate + // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last + // active flag, whereas the PTEST instruction with the same mask doesn't. auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); - if (Mask != PTestLikeMask) + uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode); + if ((Mask != PTestLikeMask) || + (PredElementSize != AArch64::ElementSizeB)) return false; // Fallthough to simply remove the PTEST. Index: llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll +++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll @@ -20,7 +20,8 @@ define i32 @cmpeq_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmpeq_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpeq.nxv4i32( %pg, %a, %b) @@ -65,7 +66,8 @@ define i32 @cmpeq_wide_nxv8i16( %pg, %a, %b) { ; CHECK-LABEL: cmpeq_wide_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) @@ -79,7 +81,8 @@ define i32 @cmpeq_wide_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmpeq_wide_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) Index: llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir =================================================================== --- llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir +++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir @@ -65,7 +65,7 @@ liveins: $p0, $z0, $z1 ; CHECK-LABEL: name: cmpeq_nxv8i16 - ; CHECK-NOT: PTEST + ; CHECK: PTEST %2:zpr = COPY $z1 %1:zpr = COPY $z0 %0:ppr_3b = COPY $p0 @@ -101,7 +101,7 @@ liveins: $p0, $z0, $z1 ; CHECK-LABEL: name: cmpeq_nxv4i32 - ; CHECK-NOT: PTEST + ; CHECK: PTEST %2:zpr = COPY $z1 %1:zpr = COPY $z0 %0:ppr_3b = COPY $p0 @@ -137,7 +137,7 @@ liveins: $p0, $z0, $z1 ; CHECK-LABEL: name: cmpeq_nxv2i64 - ; CHECK-NOT: PTEST + ; CHECK: PTEST %2:zpr = COPY $z1 %1:zpr = COPY $z0 %0:ppr_3b = COPY $p0 @@ -204,7 +204,7 @@ liveins: $p0, $z0 ; CHECK-LABEL: name: cmpeq_imm_nxv8i16 - ; CHECK-NOT: PTEST + ; CHECK: PTEST %1:zpr = COPY $z0 %0:ppr_3b = COPY $p0 %2:ppr = CMPEQ_PPzZI_H %0, %1, 0, implicit-def dead $nzcv @@ -237,7 +237,7 @@ liveins: $p0, $z0 ; CHECK-LABEL: name: cmpeq_imm_nxv4i32 - ; CHECK-NOT: PTEST + ; CHECK: PTEST %1:zpr = COPY $z0 %0:ppr_3b = COPY $p0 %2:ppr = CMPEQ_PPzZI_S %0, %1, 0, implicit-def dead $nzcv @@ -270,7 +270,7 @@ liveins: $p0, $z0 ; CHECK-LABEL: name: cmpeq_imm_nxv2i64 - ; CHECK-NOT: PTEST + ; CHECK: PTEST %1:zpr = COPY $z0 %0:ppr_3b = COPY $p0 %2:ppr = CMPEQ_PPzZI_D %0, %1, 0, implicit-def dead $nzcv @@ -339,7 +339,7 @@ liveins: $p0, $z0, $z1 ; CHECK-LABEL: name: cmpeq_wide_nxv8i16 - ; CHECK-NOT: PTEST + ; CHECK: PTEST %2:zpr = COPY $z1 %1:zpr = COPY $z0 %0:ppr_3b = COPY $p0 @@ -375,7 +375,7 @@ liveins: $p0, $z0, $z1 ; CHECK-LABEL: name: cmpeq_wide_nxv4i32 - ; CHECK-NOT: PTEST + ; CHECK: PTEST %2:zpr = COPY $z1 %1:zpr = COPY $z0 %0:ppr_3b = COPY $p0 Index: llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll +++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll @@ -20,7 +20,8 @@ define i32 @cmpge_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmpge_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpge.nxv4i32( %pg, %a, %b) @@ -65,7 +66,8 @@ define i32 @cmpge_wide_nxv8i16( %pg, %a, %b) { ; CHECK-LABEL: cmpge_wide_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: cmpge p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cmpge p1.h, p0/z, z0.h, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) @@ -79,7 +81,8 @@ define i32 @cmpge_wide_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmpge_wide_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) Index: llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll +++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll @@ -20,7 +20,8 @@ define i32 @cmpgt_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmpgt_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpgt.nxv4i32( %pg, %a, %b) @@ -65,7 +66,8 @@ define i32 @cmpgt_wide_nxv8i16( %pg, %a, %b) { ; CHECK-LABEL: cmpgt_wide_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: cmpgt p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cmpgt p1.h, p0/z, z0.h, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) @@ -79,7 +81,8 @@ define i32 @cmpgt_wide_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmpgt_wide_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) Index: llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll +++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll @@ -20,7 +20,8 @@ define i32 @cmphi_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmphi_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmphi.nxv4i32( %pg, %a, %b) @@ -66,7 +67,8 @@ define i32 @cmphi_wide_nxv8i16( %pg, %a, %b) { ; CHECK-LABEL: cmphi_wide_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: cmphi p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cmphi p1.h, p0/z, z0.h, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) @@ -80,7 +82,8 @@ define i32 @cmphi_wide_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmphi_wide_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) Index: llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll +++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll @@ -20,7 +20,8 @@ define i32 @cmphs_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmphs_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmphs p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cmphs p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmphs.nxv4i32( %pg, %a, %b) @@ -65,7 +66,8 @@ define i32 @cmphs_wide_nxv8i16( %pg, %a, %b) { ; CHECK-LABEL: cmphs_wide_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: cmphs p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cmphs p1.h, p0/z, z0.h, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) @@ -79,7 +81,8 @@ define i32 @cmphs_wide_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmphs_wide_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmphs p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cmphs p1.s, p0/z, z0.s, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) Index: llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll +++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll @@ -37,7 +37,8 @@ define i32 @cmple_wide_nxv8i16( %pg, %a, %b) { ; CHECK-LABEL: cmple_wide_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: cmple p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cmple p1.h, p0/z, z0.h, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) @@ -51,7 +52,8 @@ define i32 @cmple_wide_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmple_wide_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmple p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cmple p1.s, p0/z, z0.s, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) @@ -114,7 +116,8 @@ define i1 @cmp32_ptest_first_px( %pg, %a, %b) { ; CHECK-LABEL: cmp32_ptest_first_px: ; CHECK: // %bb.0: -; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, mi ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) @@ -130,7 +133,8 @@ define i1 @cmp32_ptest_last_px( %pg, %a, %b) { ; CHECK-LABEL: cmp32_ptest_last_px: ; CHECK: // %bb.0: -; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, lo ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) @@ -146,7 +150,8 @@ define i1 @cmp32_ptest_any_px( %pg, %a, %b) { ; CHECK-LABEL: cmp32_ptest_any_px: ; CHECK: // %bb.0: -; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) Index: llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll +++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll @@ -37,7 +37,8 @@ define i32 @cmplo_wide_nxv8i16( %pg, %a, %b) { ; CHECK-LABEL: cmplo_wide_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: cmplo p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cmplo p1.h, p0/z, z0.h, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) @@ -51,7 +52,8 @@ define i32 @cmplo_wide_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmplo_wide_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmplo p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cmplo p1.s, p0/z, z0.s, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) Index: llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll +++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll @@ -37,7 +37,8 @@ define i32 @cmpls_wide_nxv8i16( %pg, %a, %b) { ; CHECK-LABEL: cmpls_wide_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: cmpls p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cmpls p1.h, p0/z, z0.h, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) @@ -51,7 +52,8 @@ define i32 @cmpls_wide_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmpls_wide_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmpls p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cmpls p1.s, p0/z, z0.s, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) Index: llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll +++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll @@ -37,7 +37,8 @@ define i32 @cmplt_wide_nxv8i16( %pg, %a, %b) { ; CHECK-LABEL: cmplt_wide_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: cmplt p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cmplt p1.h, p0/z, z0.h, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) @@ -51,7 +52,8 @@ define i32 @cmplt_wide_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmplt_wide_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmplt p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cmplt p1.s, p0/z, z0.s, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) Index: llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll +++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll @@ -20,7 +20,8 @@ define i32 @cmpne_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmpne_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpne.nxv4i32( %pg, %a, %b) @@ -65,7 +66,8 @@ define i32 @cmpne_wide_nxv8i16( %pg, %a, %b) { ; CHECK-LABEL: cmpne_wide_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) @@ -79,7 +81,8 @@ define i32 @cmpne_wide_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmpne_wide_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, z1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) Index: llvm/test/CodeGen/AArch64/sve-ptest-removal-pfirst-pnext.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-ptest-removal-pfirst-pnext.ll +++ llvm/test/CodeGen/AArch64/sve-ptest-removal-pfirst-pnext.ll @@ -17,6 +17,7 @@ ; CHECK-LABEL: pnext_2: ; CHECK: // %bb.0: ; CHECK-NEXT: pnext p1.d, p0, p1.d +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.pnext.nxv2i1( %pg, %a) @@ -29,6 +30,7 @@ ; CHECK-LABEL: pnext_4: ; CHECK: // %bb.0: ; CHECK-NEXT: pnext p1.s, p0, p1.s +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.pnext.nxv4i1( %pg, %a) @@ -41,6 +43,7 @@ ; CHECK-LABEL: pnext_8: ; CHECK: // %bb.0: ; CHECK-NEXT: pnext p1.h, p0, p1.h +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.pnext.nxv8i1( %pg, %a) Index: llvm/test/CodeGen/AArch64/sve-setcc.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-setcc.ll +++ llvm/test/CodeGen/AArch64/sve-setcc.ll @@ -6,6 +6,7 @@ ; CHECK-LABEL: sve_cmplt_setcc: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmplt p1.h, p0/z, z0.h, #0 +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: b.eq .LBB0_2 ; CHECK-NEXT: // %bb.1: // %if.then ; CHECK-NEXT: st1h { z0.h }, p0, [x0] @@ -29,6 +30,7 @@ ; CHECK-LABEL: sve_cmplt_setcc_inverted: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmplt p1.h, p0/z, z0.h, #0 +; CHECK-NEXT: ptest p0, p1.b ; CHECK-NEXT: b.ne .LBB1_2 ; CHECK-NEXT: // %bb.1: // %if.then ; CHECK-NEXT: st1h { z0.h }, p0, [x0]