Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -447,6 +447,28 @@ defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", AArch64fabs_mt>; defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", AArch64fneg_mt>; + // zext(cmpeq(x, splat(0))) -> cnot(x) + def : Pat<(nxv16i8 (zext (nxv16i1 (AArch64setcc_z nxv16i1:$Op1, nxv16i8:$Op2, (SVEDup0), SETEQ)))), + (CNOT_ZPmZ_B $Op2, $Op1, $Op2)>; + def : Pat<(nxv8i16 (zext (nxv8i1 (AArch64setcc_z nxv8i1:$Op1, nxv8i16:$Op2, (SVEDup0), SETEQ)))), + (CNOT_ZPmZ_H $Op2, $Op1, $Op2)>; + def : Pat<(nxv4i32 (zext (nxv4i1 (AArch64setcc_z nxv4i1:$Op1, nxv4i32:$Op2, (SVEDup0), SETEQ)))), + (CNOT_ZPmZ_S $Op2, $Op1, $Op2)>; + def : Pat<(nxv2i64 (zext (nxv2i1 (AArch64setcc_z nxv2i1:$Op1, nxv2i64:$Op2, (SVEDup0), SETEQ)))), + (CNOT_ZPmZ_D $Op2, $Op1, $Op2)>; + def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive)), (nxv2i64 (zext (nxv2i1 (AArch64setcc_z nxv2i1:$Op1, nxv2f16:$Op2, (SVEDup0), SETOEQ)))), (nxv2f16 undef))), + (CNOT_ZPmZ_H $Op2, $Op1, $Op2)>; + def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 (SVEAllActive)), (nxv4i32 (zext (nxv4i1 (AArch64setcc_z nxv4i1:$Op1, nxv4f16:$Op2, (SVEDup0), SETOEQ)))), (nxv4f16 undef))), + (CNOT_ZPmZ_H $Op2, $Op1, $Op2)>; + def : Pat<(nxv8f16 (AArch64ucvtf_mt (nxv8i1 (SVEAllActive)), (nxv8i16 (zext (nxv8i1 (AArch64setcc_z nxv8i1:$Op1, nxv8f16:$Op2, (SVEDup0), SETOEQ)))), (nxv8f16 undef))), + (CNOT_ZPmZ_H $Op2, $Op1, $Op2)>; + def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive)), (nxv2i64 (zext (nxv2i1 (AArch64setcc_z nxv2i1:$Op1, nxv2f32:$Op2, (SVEDup0), SETOEQ)))), (nxv2f32 undef))), + (CNOT_ZPmZ_S $Op2, $Op1, $Op2)>; + def : Pat<(nxv4f32 (AArch64ucvtf_mt (nxv4i1 (SVEAllActive)), (nxv4i32 (zext (nxv4i1 (AArch64setcc_z nxv4i1:$Op1, nxv4f32:$Op2, (SVEDup0), SETOEQ)))), (nxv4f32 undef))), + (CNOT_ZPmZ_S $Op2, $Op1, $Op2)>; + def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive)), (nxv2i64 (zext (nxv2i1 (AArch64setcc_z nxv2i1:$Op1, nxv2f64:$Op2, (SVEDup0), SETOEQ)))), (nxv2f64 undef))), + (CNOT_ZPmZ_D $Op2, $Op1, $Op2)>; + defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", int_aarch64_sve_smax, DestructiveBinaryComm>; defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", int_aarch64_sve_umax, DestructiveBinaryComm>; defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", "SMIN_ZPZZ", int_aarch64_sve_smin, DestructiveBinaryComm>; Index: llvm/test/CodeGen/AArch64/sve-cmp-folds.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-cmp-folds.ll +++ llvm/test/CodeGen/AArch64/sve-cmp-folds.ll @@ -57,8 +57,7 @@ ; CHECK-LABEL: icmp_cnot_nxv16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 +; CHECK-NEXT: cnot z0.b, p0/m, z0.b ; CHECK-NEXT: ret %mask = icmp eq %a, zeroinitializer %zext = zext %mask to @@ -69,8 +68,7 @@ ; CHECK-LABEL: icmp_cnot_nxv8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 -; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 +; CHECK-NEXT: cnot z0.h, p0/m, z0.h ; CHECK-NEXT: ret %mask = icmp eq %a, zeroinitializer %zext = zext %mask to @@ -81,8 +79,7 @@ ; CHECK-LABEL: icmp_cnot_nxv4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 -; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 +; CHECK-NEXT: cnot z0.s, p0/m, z0.s ; CHECK-NEXT: ret %mask = icmp eq %a, zeroinitializer %zext = zext %mask to @@ -93,8 +90,7 @@ ; CHECK-LABEL: icmp_cnot_nxv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, #0 -; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 +; CHECK-NEXT: cnot z0.d, p0/m, z0.d ; CHECK-NEXT: ret %mask = icmp eq %a, zeroinitializer %zext = zext %mask to @@ -105,9 +101,7 @@ ; CHECK-LABEL: fcmp_cnot_nxv2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1 -; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d +; CHECK-NEXT: cnot z0.h, p0/m, z0.h ; CHECK-NEXT: ret %mask = fcmp oeq %a, zeroinitializer %conv = uitofp %mask to @@ -118,9 +112,7 @@ ; CHECK-LABEL: fcmp_cnot_nxv4f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1 -; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s +; CHECK-NEXT: cnot z0.h, p0/m, z0.h ; CHECK-NEXT: ret %mask = fcmp oeq %a, zeroinitializer %conv = uitofp %mask to @@ -131,8 +123,7 @@ ; CHECK-LABEL: fcmp_cnot_nxv8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 +; CHECK-NEXT: cnot z0.h, p0/m, z0.h ; CHECK-NEXT: ret %mask = fcmp oeq %a, zeroinitializer %conv = uitofp %mask to @@ -143,9 +134,7 @@ ; CHECK-LABEL: fcmp_cnot_nxv2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1 -; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d +; CHECK-NEXT: cnot z0.s, p0/m, z0.s ; CHECK-NEXT: ret %mask = fcmp oeq %a, zeroinitializer %conv = uitofp %mask to @@ -156,8 +145,7 @@ ; CHECK-LABEL: fcmp_cnot_nxv4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 +; CHECK-NEXT: cnot z0.s, p0/m, z0.s ; CHECK-NEXT: ret %mask = fcmp oeq %a, zeroinitializer %conv = uitofp %mask to @@ -168,8 +156,7 @@ ; CHECK-LABEL: fcmp_cnot_nxv2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, #0.0 -; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 +; CHECK-NEXT: cnot z0.d, p0/m, z0.d ; CHECK-NEXT: ret %mask = fcmp oeq %a, zeroinitializer %conv = uitofp %mask to