Index: llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll +++ llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll @@ -13,7 +13,7 @@ ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, zeroinitializer, %a) %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %2, %1) %conv = zext i1 %3 to i32 ret i32 %conv } @@ -29,7 +29,7 @@ ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmple.wide.nxv16i8( %pg, %a, %b) - %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %1) %conv = zext i1 %2 to i32 ret i32 %conv } @@ -43,7 +43,7 @@ %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) %2 = tail call @llvm.aarch64.sve.cmple.wide.nxv8i16( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) - %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %3) %conv = zext i1 %4 to i32 ret i32 %conv } @@ -57,19 +57,436 @@ %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) %2 = tail call @llvm.aarch64.sve.cmple.wide.nxv4i32( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) - %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %3) %conv = zext i1 %4 to i32 ret i32 %conv } +; ============================================================================== +; PTEST_OP(PG, CMP(PG, ...)) +; ============================================================================== + +; +; PTEST_FIRST(PG, CMP8(PG, A, B)). PTEST is redundant. +; +define i1 @cmp8_ptest_first_px( %pg, %a, %b) { +; CHECK-LABEL: cmp8_ptest_first_px: +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1( %pg, %1) + ret i1 %2 +} + +; +; PTEST_LAST(PG, CMP8(PG, A, B)). PTEST is redundant. +; +define i1 @cmp8_ptest_last_px( %pg, %a, %b) { +; CHECK-LABEL: cmp8_ptest_last_px: +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1( %pg, %1) + ret i1 %2 +} + +; +; PTEST_ANY(PG, CMP8(PG, A, B)). PTEST is redundant. +; +define i1 @cmp8_ptest_any_px( %pg, %a, %b) { +; CHECK-LABEL: cmp8_ptest_any_px: +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %1) + ret i1 %2 +} + +; +; PTEST_FIRST(PG, CMP32(PG, A, B)). Can't remove PTEST since PTEST.B vs CMP.S. +; +define i1 @cmp32_ptest_first_px( %pg, %a, %b) { +; CHECK-LABEL: cmp32_ptest_first_px: +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmpge.nxv4i32( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1( %pg, %3) + ret i1 %4 +} + +; +; PTEST_LAST(PG, CMP32(PG, A, B)). Can't remove PTEST since PTEST.B vs CMP.S. +; +define i1 @cmp32_ptest_last_px( %pg, %a, %b) { +; CHECK-LABEL: cmp32_ptest_last_px: +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmpge.nxv4i32( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1( %pg, %3) + ret i1 %4 +} + +; +; PTEST_ANY(PG, CMP32(PG, A, B)). PTEST is redundant. +; +define i1 @cmp32_ptest_any_px( %pg, %a, %b) { +; CHECK-LABEL: cmp32_ptest_any_px: +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmpge.nxv4i32( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %3) + ret i1 %4 +} + +; ============================================================================== +; PTEST_OP(X=CMP(PG, ...), X) +; ============================================================================== + +; +; PTEST_FIRST(X=CMP8(PG, A, B), X). PTEST is redundant if condition is changed +; to any. +; +; Can't remove PTEST and keep the same condition (first), since the mask for +; the implicit PTEST (PG) performed by the compare differs from the mask +; specified to the explicit PTEST and could have a different result. +; +; For example, consider +; +; PG=<1, 1, x, x> +; Z0=<1, 2, x, x> +; Z1=<2, 1, x, x> +; +; X=CMPLE(PG, Z0, Z1) +; =<0, 1, x, x> NZCV=0xxx +; PTEST(X, X), NZCV=1xxx +; +; where the first active flag (bit 'N' in NZCV) is set by the explicit PTEST, +; but not by the implicit PTEST as part of the compare. However, given the +; PTEST mask and source are the same, first is equivalent to any. The same +; applies to last active. +; +define i1 @cmp8_ptest_first_xx( %pg, %a, %b) { +; CHECK-LABEL: cmp8_ptest_first_xx: +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1( %1, %1) + ret i1 %2 +} + +; +; PTEST_LAST(X=CMP8(PG, A, B), X). PTEST is redundant if condition is changed +; to any. +; +define i1 @cmp8_ptest_last_xx( %pg, %a, %b) { +; CHECK-LABEL: cmp8_ptest_last_xx: +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1( %1, %1) + ret i1 %2 +} + +; +; PTEST_ANY(X=CMP8(PG, A, B), X). PTEST is redundant. +; +define i1 @cmp8_ptest_any_xx( %pg, %a, %b) { +; CHECK-LABEL: cmp8_ptest_any_xx: +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + +; +; PTEST_FIRST(X=CMP32(PG, A, B), X). PTEST is redundant if condition is changed +; to any. +; +define i1 @cmp32_ptest_first_xx( %pg, %a, %b) { +; CHECK-LABEL: cmp32_ptest_first_xx: +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmpge.nxv4i32( %1, %a, %b) + %3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv4i1( %2, %2) + ret i1 %3 +} + +; +; PTEST_LAST(X=CMP32(PG, A, B), X). PTEST is redundant if condition is changed +; to any. +; +define i1 @cmp32_ptest_last_xx( %pg, %a, %b) { +; CHECK-LABEL: cmp32_ptest_last_xx: +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmpge.nxv4i32( %1, %a, %b) + %3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv4i1( %2, %2) + ret i1 %3 +} + +; +; PTEST_ANY(X=CMP32(PG, A, B), X). PTEST is redundant. +; +define i1 @cmp32_ptest_any_xx( %pg, %a, %b) { +; CHECK-LABEL: cmp32_ptest_any_xx: +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmpge.nxv4i32( %1, %a, %b) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1( %2, %2) + ret i1 %3 +} + +; ============================================================================== +; PTEST_OP(PTRUE_ALL, CMP(PG, ...)) +; ============================================================================== + +; +; PTEST_FIRST(PTRUE_ALL, CMP8(PG, A, B)). Can't remove PTEST since mask is +; different. +; +define i1 @cmp8_ptest_first_ax( %pg, %a, %b) { +; CHECK-LABEL: cmp8_ptest_first_ax: +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, %b) + %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1( %2, %1) + ret i1 %3 +} + +; +; PTEST_LAST(PTRUE_ALL, CMP8(PG, A, B)). Can't remove PTEST since mask is +; different. +; +define i1 @cmp8_ptest_last_ax( %pg, %a, %b) { +; CHECK-LABEL: cmp8_ptest_last_ax: +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, %b) + %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1( %2, %1) + ret i1 %3 +} + +; +; PTEST_ANY(PTRUE_ALL, CMP8(PG, A, B)). PTEST is redundant. +; +define i1 @cmp8_ptest_any_ax( %pg, %a, %b) { +; CHECK-LABEL: cmp8_ptest_any_ax: +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, %b) + %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %2, %1) + ret i1 %3 +} + +; +; PTEST_FIRST(PTRUE_ALL, CMP32(PG, A, B)). Can't remove PTEST since mask is +; different. +; +define i1 @cmp32_ptest_first_ax( %pg, %a, %b) { +; CHECK-LABEL: cmp32_ptest_first_ax: +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmpge.nxv4i32( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %4 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv4i1( %3, %2) + ret i1 %4 +} + +; +; PTEST_LAST(PTRUE_ALL, CMP32(PG, A, B)). Can't remove PTEST since mask is +; different. +; +define i1 @cmp32_ptest_last_ax( %pg, %a, %b) { +; CHECK-LABEL: cmp32_ptest_last_ax: +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmpge.nxv4i32( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %4 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv4i1( %3, %2) + ret i1 %4 +} + +; +; PTEST_ANY(PTRUE_ALL, CMP32(PG, A, B)). PTEST is redundant. +; +define i1 @cmp32_ptest_any_ax( %pg, %a, %b) { +; CHECK-LABEL: cmp32_ptest_any_ax: +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + %2 = tail call @llvm.aarch64.sve.cmpge.nxv4i32( %1, %a, %b) + %3 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1( %3, %2) + ret i1 %4 +} + +; ============================================================================== +; PTEST_OP(PTRUE_ALL, CMP(PTRUE_ALL, ...)) +; ============================================================================== + +; +; PTEST_FIRST(PTRUE_ALL, CMP8(PTRUE_ALL, A, B)). PTEST is redundant. +; +define i1 @cmp8_ptest_first_aa( %a, %b) { +; CHECK-LABEL: cmp8_ptest_first_aa: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %1, %a, %b) + %3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1( %1, %2) + ret i1 %3 +} + +; +; PTEST_LAST(PTRUE_ALL, CMP8(PTRUE_ALL, A, B)). PTEST is redundant. +; +define i1 @cmp8_ptest_last_aa( %a, %b) { +; CHECK-LABEL: cmp8_ptest_last_aa: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %1, %a, %b) + %3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1( %1, %2) + ret i1 %3 +} + +; +; PTEST_ANY(PTRUE_ALL, CMP8(PTRUE_ALL, A, B)). PTEST is redundant. +; +define i1 @cmp8_ptest_any_aa( %a, %b) { +; CHECK-LABEL: cmp8_ptest_any_aa: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %1, %a, %b) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %2) + ret i1 %3 +} + +; +; PTEST_FIRST(PTRUE_ALL, CMP32(PTRUE_ALL, A, B)). PTEST is redundant. +; +define i1 @cmp32_ptest_first_aa( %a, %b) { +; CHECK-LABEL: cmp32_ptest_first_aa: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.cmpge.nxv4i32( %1, %a, %b) + %3 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv4i1( %1, %2) + ret i1 %3 +} + +; +; PTEST_LAST(PTRUE_ALL, CMP32(PTRUE_ALL, A, B)). PTEST is redundant. +; +define i1 @cmp32_ptest_last_aa( %a, %b) { +; CHECK-LABEL: cmp32_ptest_last_aa: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.cmpge.nxv4i32( %1, %a, %b) + %3 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv4i1( %1, %2) + ret i1 %3 +} + +; +; PTEST_ANY(PTRUE_ALL, CMP32(PTRUE_ALL, A, B)). PTEST is redundant. +; +define i1 @cmp32_ptest_any_aa( %a, %b) { +; CHECK-LABEL: cmp32_ptest_any_aa: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call @llvm.aarch64.sve.cmpge.nxv4i32( %1, %a, %b) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1( %1, %2) + ret i1 %3 +} + declare @llvm.aarch64.sve.cmpge.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmpge.nxv4i32(, , ) declare @llvm.aarch64.sve.cmple.wide.nxv16i8(, , ) declare @llvm.aarch64.sve.cmple.wide.nxv8i16(, , ) declare @llvm.aarch64.sve.cmple.wide.nxv4i32(, , ) -declare i1 @llvm.aarch64.sve.ptest.any(, ) +declare i1 @llvm.aarch64.sve.ptest.any.nxv16i1(, ) +declare i1 @llvm.aarch64.sve.ptest.first.nxv16i1(, ) +declare i1 @llvm.aarch64.sve.ptest.last.nxv16i1(, ) + +declare i1 @llvm.aarch64.sve.ptest.any.nxv4i1(, ) +declare i1 @llvm.aarch64.sve.ptest.first.nxv4i1(, ) +declare i1 @llvm.aarch64.sve.ptest.last.nxv4i1(, ) declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1()