diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -953,6 +953,42 @@ (!cast(NAME # _S) PPRAny:$pred, $Rn)>; def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv2i1 PPRAny:$pred), (nxv2i1 PPRAny:$pred)))), (!cast(NAME # _D) PPRAny:$pred, $Rn)>; + + // combine_op(x, trunc(cntp(all_active, p))) ==> inst p, x + def : Pat<(i32 (combine_op GPR32:$Rn, (trunc (int_aarch64_sve_cntp_oneuse (nxv16i1 (SVEAllActive)), (nxv16i1 PPRAny:$pred))))), + (i32 (EXTRACT_SUBREG (!cast(NAME # _B) PPRAny:$pred, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32)), + sub_32))>; + def : Pat<(i32 (combine_op GPR32:$Rn, (trunc (int_aarch64_sve_cntp_oneuse (nxv8i1 (SVEAllActive)), (nxv8i1 PPRAny:$pred))))), + (i32 (EXTRACT_SUBREG (!cast(NAME # _H) PPRAny:$pred, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32)), + sub_32))>; + def : Pat<(i32 (combine_op GPR32:$Rn, (trunc (int_aarch64_sve_cntp_oneuse (nxv4i1 (SVEAllActive)), (nxv4i1 PPRAny:$pred))))), + (i32 (EXTRACT_SUBREG (!cast(NAME # _S) PPRAny:$pred, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32)), + sub_32))>; + def : Pat<(i32 (combine_op GPR32:$Rn, (trunc (int_aarch64_sve_cntp_oneuse (nxv2i1 (SVEAllActive)), (nxv2i1 PPRAny:$pred))))), + (i32 (EXTRACT_SUBREG (!cast(NAME # _D) PPRAny:$pred, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32)), + sub_32))>; + + // combine_op(x, trunc(cntp(p, p))) ==> inst p, x + def : Pat<(i32 (combine_op GPR32:$Rn, (trunc (int_aarch64_sve_cntp_oneuse (nxv16i1 PPRAny:$pred), (nxv16i1 PPRAny:$pred))))), + (i32 (EXTRACT_SUBREG (!cast(NAME # _B) PPRAny:$pred, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32)), + sub_32))>; + def : Pat<(i32 (combine_op GPR32:$Rn, (trunc (int_aarch64_sve_cntp_oneuse (nxv8i1 PPRAny:$pred), (nxv8i1 PPRAny:$pred))))), + (i32 (EXTRACT_SUBREG (!cast(NAME # _H) PPRAny:$pred, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32)), + sub_32))>; + def : Pat<(i32 (combine_op GPR32:$Rn, (trunc (int_aarch64_sve_cntp_oneuse (nxv4i1 PPRAny:$pred), (nxv4i1 PPRAny:$pred))))), + (i32 (EXTRACT_SUBREG (!cast(NAME # _S) PPRAny:$pred, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32)), + sub_32))>; + def : Pat<(i32 (combine_op GPR32:$Rn, (trunc (int_aarch64_sve_cntp_oneuse (nxv2i1 PPRAny:$pred), (nxv2i1 PPRAny:$pred))))), + (i32 (EXTRACT_SUBREG (!cast(NAME # _D) PPRAny:$pred, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32)), + sub_32))>; } class sve_int_count_v sz8_64, bits<5> opc, string asm, diff --git a/llvm/test/CodeGen/AArch64/sve-cntp-combine-i32.ll b/llvm/test/CodeGen/AArch64/sve-cntp-combine-i32.ll --- a/llvm/test/CodeGen/AArch64/sve-cntp-combine-i32.ll +++ b/llvm/test/CodeGen/AArch64/sve-cntp-combine-i32.ll @@ -8,9 +8,9 @@ define i32 @cntp_add_all_active_nxv16i1(i32 %x, %pg) #0 { ; CHECK-LABEL: cntp_add_all_active_nxv16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: cntp x8, p1, p0.b -; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: incp x0, p0.b +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1( %1, %pg) @@ -22,9 +22,9 @@ define i32 @cntp_add_all_active_nxv8i1(i32 %x, %pg) #0 { ; CHECK-LABEL: cntp_add_all_active_nxv8i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: cntp x8, p1, p0.h -; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: incp x0, p0.h +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1( %1, %pg) @@ -36,9 +36,9 @@ define i32 @cntp_add_all_active_nxv4i1(i32 %x, %pg) #0 { ; CHECK-LABEL: cntp_add_all_active_nxv4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: cntp x8, p1, p0.s -; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: incp x0, p0.s +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1( %1, %pg) @@ -50,9 +50,9 @@ define i32 @cntp_add_all_active_nxv2i1(i32 %x, %pg) #0 { ; CHECK-LABEL: cntp_add_all_active_nxv2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: cntp x8, p1, p0.d -; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: incp x0, p0.d +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %1, %pg) @@ -64,9 +64,9 @@ define i32 @cntp_add_all_active_nxv8i1_via_cast(i32 %x, %pg) #0 { ; CHECK-LABEL: cntp_add_all_active_nxv8i1_via_cast: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: cntp x8, p1, p0.h -; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: incp x0, p0.h +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %1) @@ -97,8 +97,9 @@ define i32 @cntp_add_same_active_nxv16i1(i32 %x, %pg) #0 { ; CHECK-LABEL: cntp_add_same_active_nxv16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: cntp x8, p0, p0.b -; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: incp x0, p0.b +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1( %pg, %pg) %2 = trunc i64 %1 to i32 @@ -109,8 +110,9 @@ define i32 @cntp_add_same_active_nxv8i1(i32 %x, %pg) #0 { ; CHECK-LABEL: cntp_add_same_active_nxv8i1: ; CHECK: // %bb.0: -; CHECK-NEXT: cntp x8, p0, p0.h -; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: incp x0, p0.h +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1( %pg, %pg) %2 = trunc i64 %1 to i32 @@ -121,8 +123,9 @@ define i32 @cntp_add_same_active_nxv4i1(i32 %x, %pg) #0 { ; CHECK-LABEL: cntp_add_same_active_nxv4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: cntp x8, p0, p0.s -; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: incp x0, p0.s +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1( %pg, %pg) %2 = trunc i64 %1 to i32 @@ -133,8 +136,9 @@ define i32 @cntp_add_same_active_nxv2i1(i32 %x, %pg) #0 { ; CHECK-LABEL: cntp_add_same_active_nxv2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: cntp x8, p0, p0.d -; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: incp x0, p0.d +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %pg, %pg) %2 = trunc i64 %1 to i32 @@ -163,9 +167,9 @@ define i32 @cntp_sub_all_active_nxv16i1(i32 %x, %pg) #0 { ; CHECK-LABEL: cntp_sub_all_active_nxv16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: cntp x8, p1, p0.b -; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: decp x0, p0.b +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1( %1, %pg) @@ -177,9 +181,9 @@ define i32 @cntp_sub_all_active_nxv8i1(i32 %x, %pg) #0 { ; CHECK-LABEL: cntp_sub_all_active_nxv8i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: cntp x8, p1, p0.h -; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: decp x0, p0.h +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1( %1, %pg) @@ -191,9 +195,9 @@ define i32 @cntp_sub_all_active_nxv4i1(i32 %x, %pg) #0 { ; CHECK-LABEL: cntp_sub_all_active_nxv4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: cntp x8, p1, p0.s -; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: decp x0, p0.s +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1( %1, %pg) @@ -205,9 +209,9 @@ define i32 @cntp_sub_all_active_nxv2i1(i32 %x, %pg) #0 { ; CHECK-LABEL: cntp_sub_all_active_nxv2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: cntp x8, p1, p0.d -; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: decp x0, p0.d +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %1, %pg) @@ -219,9 +223,9 @@ define i32 @cntp_sub_all_active_nxv8i1_via_cast(i32 %x, %pg) #0 { ; CHECK-LABEL: cntp_sub_all_active_nxv8i1_via_cast: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: cntp x8, p1, p0.h -; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: decp x0, p0.h +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %1) @@ -252,8 +256,9 @@ define i32 @cntp_sub_same_active_nxv16i1(i32 %x, %pg) #0 { ; CHECK-LABEL: cntp_sub_same_active_nxv16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: cntp x8, p0, p0.b -; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: decp x0, p0.b +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1( %pg, %pg) %2 = trunc i64 %1 to i32 @@ -264,8 +269,9 @@ define i32 @cntp_sub_same_active_nxv8i1(i32 %x, %pg) #0 { ; CHECK-LABEL: cntp_sub_same_active_nxv8i1: ; CHECK: // %bb.0: -; CHECK-NEXT: cntp x8, p0, p0.h -; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: decp x0, p0.h +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1( %pg, %pg) %2 = trunc i64 %1 to i32 @@ -276,8 +282,9 @@ define i32 @cntp_sub_same_active_nxv4i1(i32 %x, %pg) #0 { ; CHECK-LABEL: cntp_sub_same_active_nxv4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: cntp x8, p0, p0.s -; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: decp x0, p0.s +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1( %pg, %pg) %2 = trunc i64 %1 to i32 @@ -288,8 +295,9 @@ define i32 @cntp_sub_same_active_nxv2i1(i32 %x, %pg) #0 { ; CHECK-LABEL: cntp_sub_same_active_nxv2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: cntp x8, p0, p0.d -; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: decp x0, p0.d +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %pg, %pg) %2 = trunc i64 %1 to i32