diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1301,7 +1301,7 @@ setCondCodeAction(ISD::SETUGE, VT, Expand); setCondCodeAction(ISD::SETUGT, VT, Expand); setCondCodeAction(ISD::SETUEQ, VT, Expand); - setCondCodeAction(ISD::SETUNE, VT, Expand); + setCondCodeAction(ISD::SETONE, VT, Expand); } for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) { @@ -1556,7 +1556,7 @@ setCondCodeAction(ISD::SETUGE, VT, Expand); setCondCodeAction(ISD::SETUGT, VT, Expand); setCondCodeAction(ISD::SETUEQ, VT, Expand); - setCondCodeAction(ISD::SETUNE, VT, Expand); + setCondCodeAction(ISD::SETONE, VT, Expand); } // Mark integer truncating stores/extending loads as having custom lowering diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1493,7 +1493,7 @@ defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", SETOGE, SETGE, SETOLE, SETLE>; defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", SETOGT, SETGT, SETOLT, SETLT>; defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>; - defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", SETONE, SETNE, SETONE, SETNE>; + defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", SETUNE, SETNE, SETUNE, SETNE>; defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", SETUO, SETUO, SETUO, SETUO>; defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>; defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>; @@ -1503,7 +1503,7 @@ defm FCMLT_PPzZ0 : sve_fp_2op_p_pd<0b010, "fcmlt", SETOLT, SETLT, SETOGT, SETGT>; defm FCMLE_PPzZ0 : sve_fp_2op_p_pd<0b011, "fcmle", SETOLE, SETLE, SETOGE, SETGE>; defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>; - defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne", SETONE, SETNE, SETONE, SETNE>; + defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne", SETUNE, SETNE, SETUNE, SETNE>; defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt>; defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele>; diff --git a/llvm/test/CodeGen/AArch64/sve-fcmp.ll b/llvm/test/CodeGen/AArch64/sve-fcmp.ll --- a/llvm/test/CodeGen/AArch64/sve-fcmp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcmp.ll @@ -50,7 +50,9 @@ ; CHECK-LABEL: one: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z0.s +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b ; CHECK-NEXT: ret %y = fcmp one %x, %x2 ret %y @@ -69,8 +71,9 @@ ; CHECK-LABEL: ueq: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, z1.s -; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b ; CHECK-NEXT: ret %y = fcmp ueq %x, %x2 ret %y @@ -119,8 +122,7 @@ ; CHECK-LABEL: une: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s -; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s ; CHECK-NEXT: ret %y = fcmp une %x, %x2 ret %y @@ -147,8 +149,9 @@ ; CHECK-LABEL: ueq_2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, z1.s -; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b ; CHECK-NEXT: ret %y = fcmp ueq %x, %x2 ret %y @@ -166,8 +169,9 @@ ; CHECK-LABEL: ueq_2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcmne p1.d, p0/z, z0.d, z1.d -; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p1.d, p0/z, z0.d, z1.d +; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, z1.d +; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b ; CHECK-NEXT: ret %y = fcmp ueq %x, %x2 ret %y @@ -185,8 +189,9 @@ ; CHECK-LABEL: ueq_2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, z1.h +; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b ; CHECK-NEXT: ret %y = fcmp ueq %x, %x2 ret %y @@ -204,8 +209,9 @@ ; CHECK-LABEL: ueq_4f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, z1.h +; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b ; CHECK-NEXT: ret %y = fcmp ueq %x, %x2 ret %y @@ -223,8 +229,9 @@ ; CHECK-LABEL: ueq_8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, z1.h +; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b ; CHECK-NEXT: ret %y = fcmp ueq %x, %x2 ret %y @@ -357,7 +364,9 @@ ; CHECK-LABEL: one_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: fcmlt p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b ; CHECK-NEXT: ret %y = fcmp one %x, zeroinitializer ret %y @@ -365,9 +374,11 @@ define @ueq_zero( %x) { ; CHECK-LABEL: ueq_zero: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #0 // =0x0 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b ; CHECK-NEXT: ret %y = fcmp ueq %x, zeroinitializer ret %y @@ -416,8 +427,7 @@ ; CHECK-LABEL: une_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: ret %y = fcmp une %x, zeroinitializer ret %y diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll @@ -367,10 +367,10 @@ ; CHECK-NEXT: ptrue p0.h, vl16 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] -; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: fcmuo p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: fcmeq p2.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov p1.b, p2/m, p2.b ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: st1h { z0.h }, p0, [x2] ; CHECK-NEXT: ret %op1 = load <16 x half>, <16 x half>* %a @@ -391,7 +391,9 @@ ; CHECK-NEXT: ptrue p0.h, vl16 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] -; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z0.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov p1.b, p2/m, p2.b ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: st1h { z0.h }, p0, [x2] ; CHECK-NEXT: ret @@ -413,10 +415,8 @@ ; CHECK-NEXT: ptrue p0.h, vl16 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] -; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: st1h { z0.h }, p0, [x2] ; CHECK-NEXT: ret %op1 = load <16 x half>, <16 x half>* %a diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll @@ -8,19 +8,16 @@ ; CHECK-NEXT: ptrue p0.s, vl8 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] -; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: fcmeq p0.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0 ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z2.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: eor z0.d, z0.d, z1.d -; CHECK-NEXT: eor z1.d, z2.d, z1.d +; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b -; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: mov v1.d[1], v0.d[0] +; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: orv b0, p0, z1.b ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 @@ -36,12 +33,10 @@ ; CHECK-LABEL: ptest_v16i1_512bit_min_sve: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl16 -; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: orv b0, p0, z0.b @@ -59,12 +54,10 @@ ; CHECK-LABEL: ptest_v16i1_512bit_sve: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: orv b0, p0, z0.b @@ -84,15 +77,11 @@ ; CHECK-NEXT: ptrue p0.s, vl16 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: fcmeq p0.s, p0/z, z1.s, #0.0 -; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z2.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: eor z0.d, z0.d, z1.d -; CHECK-NEXT: eor z1.d, z2.d, z1.d +; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: mov p0.b, p1/m, p1.b +; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: orv b0, p0, z0.b @@ -122,13 +111,10 @@ ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: fcmeq p0.s, p0/z, z1.s, #0.0 -; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff -; CHECK-NEXT: eor z0.d, z0.d, z1.d -; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: bic z0.d, z0.d, z1.d +; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: mov z0.s, #0 // =0x0 +; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, z0.s +; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b @@ -153,13 +139,10 @@ ; CHECK-NEXT: ptrue p0.s, vl16 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: fcmeq p0.s, p0/z, z1.s, #0.0 -; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff -; CHECK-NEXT: eor z0.d, z0.d, z1.d -; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: bic z0.d, z0.d, z1.d +; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b +; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b diff --git a/llvm/test/CodeGen/AArch64/sve-select.ll b/llvm/test/CodeGen/AArch64/sve-select.ll --- a/llvm/test/CodeGen/AArch64/sve-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-select.ll @@ -547,8 +547,7 @@ ; CHECK-LABEL: select_f32_invert_fmul: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %p = fcmp oeq %a, zeroinitializer @@ -561,8 +560,7 @@ ; CHECK-LABEL: select_f32_invert_fadd: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %p = fcmp oeq %a, zeroinitializer