diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14401,29 +14401,34 @@ N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), DAG.getCondCode(ISD::SETUGT)); break; + case Intrinsic::aarch64_sve_fcmpge: case Intrinsic::aarch64_sve_cmpge: - if (!N->getOperand(2).getValueType().isFloatingPoint()) - return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), - N->getValueType(0), N->getOperand(1), N->getOperand(2), - N->getOperand(3), DAG.getCondCode(ISD::SETGE)); + return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), + N->getValueType(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), DAG.getCondCode(ISD::SETGE)); break; + case Intrinsic::aarch64_sve_fcmpgt: case Intrinsic::aarch64_sve_cmpgt: - if (!N->getOperand(2).getValueType().isFloatingPoint()) - return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), - N->getValueType(0), N->getOperand(1), N->getOperand(2), - N->getOperand(3), DAG.getCondCode(ISD::SETGT)); + return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), + N->getValueType(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), DAG.getCondCode(ISD::SETGT)); break; + case Intrinsic::aarch64_sve_fcmpeq: case Intrinsic::aarch64_sve_cmpeq: - if (!N->getOperand(2).getValueType().isFloatingPoint()) - return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), - N->getValueType(0), N->getOperand(1), N->getOperand(2), - N->getOperand(3), DAG.getCondCode(ISD::SETEQ)); + return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), + N->getValueType(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), DAG.getCondCode(ISD::SETEQ)); break; + case Intrinsic::aarch64_sve_fcmpne: case Intrinsic::aarch64_sve_cmpne: - if (!N->getOperand(2).getValueType().isFloatingPoint()) - return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), - N->getValueType(0), N->getOperand(1), N->getOperand(2), - N->getOperand(3), DAG.getCondCode(ISD::SETNE)); + return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), + N->getValueType(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), DAG.getCondCode(ISD::SETNE)); + break; + case Intrinsic::aarch64_sve_fcmpuo: + return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), + N->getValueType(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), DAG.getCondCode(ISD::SETUO)); break; case Intrinsic::aarch64_sve_fadda: return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1255,20 +1255,20 @@ defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, SETUGT>; defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, SETUGE>; - defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, SETOGE, SETGE, SETOLE, SETLE>; - defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, SETOGT, SETGT, SETOLT, SETLT>; - defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, SETOEQ, SETEQ, SETOEQ, SETEQ>; - defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, SETONE, SETNE, SETONE, SETNE>; - defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, SETUO, SETUO, SETUO, SETUO>; + defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", SETOGE, SETGE, SETOLE, SETLE>; + defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", SETOGT, SETGT, SETOLT, SETLT>; + defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>; + defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", SETONE, SETNE, SETONE, SETNE>; + defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", SETUO, SETUO, SETUO, SETUO>; defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>; defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>; - defm FCMGE_PPzZ0 : sve_fp_2op_p_pd<0b000, "fcmge">; - defm FCMGT_PPzZ0 : sve_fp_2op_p_pd<0b001, "fcmgt">; - defm FCMLT_PPzZ0 : sve_fp_2op_p_pd<0b010, "fcmlt">; - defm FCMLE_PPzZ0 : sve_fp_2op_p_pd<0b011, "fcmle">; - defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq">; - defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne">; + defm FCMGE_PPzZ0 : sve_fp_2op_p_pd<0b000, "fcmge", SETOGE, SETGE, SETOLE, SETLE>; + defm FCMGT_PPzZ0 : sve_fp_2op_p_pd<0b001, "fcmgt", SETOGT, SETGT, SETOLT, SETLT>; + defm FCMLT_PPzZ0 : sve_fp_2op_p_pd<0b010, "fcmlt", SETOLT, SETLT, SETOGT, SETGT>; + defm FCMLE_PPzZ0 : sve_fp_2op_p_pd<0b011, "fcmle", SETOLE, SETLE, SETOGE, SETGE>; + defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>; + defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne", SETONE, SETNE, SETONE, SETNE>; defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt>; defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -4394,6 +4394,14 @@ (cmp $Op1, $Op3, $Op2)>; } +multiclass SVE_SETCC_Pat_With_Zero { + def : Pat<(predvt (AArch64setcc_z predvt:$Op1, intvt:$Op2, (SVEDup0), cc)), + (cmp $Op1, $Op2)>; + def : Pat<(predvt (AArch64setcc_z predvt:$Op1, (SVEDup0), intvt:$Op2, invcc)), + (cmp $Op1, $Op2)>; +} + multiclass sve_int_cmp_0 opc, string asm, CondCode cc, CondCode invcc> { def _B : sve_int_cmp<0b0, 0b00, opc, asm, PPR8, ZPR8, ZPR8>; def _H : sve_int_cmp<0b0, 0b01, opc, asm, PPR16, ZPR16, ZPR16>; @@ -4754,10 +4762,13 @@ def : SVE_3_Op_Pat(NAME # _D)>; } -multiclass sve_fp_3op_p_pd_cc opc, string asm, SDPatternOperator op, +multiclass sve_fp_3op_p_pd_cc opc, string asm, CondCode cc1, CondCode cc2, - CondCode invcc1, CondCode invcc2> -: sve_fp_3op_p_pd { + CondCode invcc1, CondCode invcc2> { + def _H : sve_fp_3op_p_pd<0b01, opc, asm, PPR16, ZPR16>; + def _S : sve_fp_3op_p_pd<0b10, opc, asm, PPR32, ZPR32>; + def _D : sve_fp_3op_p_pd<0b11, opc, asm, PPR64, ZPR64>; + defm : SVE_SETCC_Pat(NAME # _H)>; defm : SVE_SETCC_Pat(NAME # _H)>; defm : SVE_SETCC_Pat(NAME # _H)>; @@ -4797,10 +4808,26 @@ let Inst{3-0} = Pd; } -multiclass sve_fp_2op_p_pd opc, string asm> { +multiclass sve_fp_2op_p_pd opc, string asm, + CondCode cc1, CondCode cc2, + CondCode invcc1, CondCode invcc2> { def _H : sve_fp_2op_p_pd<0b01, opc, asm, PPR16, ZPR16>; def _S : sve_fp_2op_p_pd<0b10, opc, asm, PPR32, ZPR32>; def _D : sve_fp_2op_p_pd<0b11, opc, asm, PPR64, ZPR64>; + + defm : SVE_SETCC_Pat_With_Zero(NAME # _H)>; + defm : SVE_SETCC_Pat_With_Zero(NAME # _H)>; + defm : SVE_SETCC_Pat_With_Zero(NAME # _H)>; + defm : SVE_SETCC_Pat_With_Zero(NAME # _S)>; + defm : SVE_SETCC_Pat_With_Zero(NAME # _S)>; + defm : SVE_SETCC_Pat_With_Zero(NAME # _D)>; + + defm : SVE_SETCC_Pat_With_Zero(NAME # _H)>; + defm : SVE_SETCC_Pat_With_Zero(NAME # _H)>; + defm : SVE_SETCC_Pat_With_Zero(NAME # _H)>; + defm : SVE_SETCC_Pat_With_Zero(NAME # _S)>; + defm : SVE_SETCC_Pat_With_Zero(NAME # _S)>; + defm : SVE_SETCC_Pat_With_Zero(NAME # _D)>; } diff --git a/llvm/test/CodeGen/AArch64/sve-fcmp.ll b/llvm/test/CodeGen/AArch64/sve-fcmp.ll --- a/llvm/test/CodeGen/AArch64/sve-fcmp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcmp.ll @@ -308,3 +308,117 @@ %y = fcmp fast one %x, %x2 ret %y } +define @oeq_zero( %x) { +; CHECK-LABEL: oeq_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: ret + %y = fcmp oeq %x, zeroinitializer + ret %y +} +define @ogt_zero( %x) { +; CHECK-LABEL: ogt_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: ret + %y = fcmp ogt %x, zeroinitializer + ret %y +} +define @oge_zero( %x) { +; CHECK-LABEL: oge_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: ret + %y = fcmp oge %x, zeroinitializer + ret %y +} +define @olt_zero( %x) { +; CHECK-LABEL: olt_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmlt p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: ret + %y = fcmp olt %x, zeroinitializer + ret %y +} +define @ole_zero( %x) { +; CHECK-LABEL: ole_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmle p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: ret + %y = fcmp ole %x, zeroinitializer + ret %y +} +define @one_zero( %x) { +; CHECK-LABEL: one_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: ret + %y = fcmp one %x, zeroinitializer + ret %y +} +define @ueq_zero( %x) { +; CHECK-LABEL: ueq_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: ret + %y = fcmp ueq %x, zeroinitializer + ret %y +} +define @ugt_zero( %x) { +; CHECK-LABEL: ugt_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmle p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: ret + %y = fcmp ugt %x, zeroinitializer + ret %y +} +define @uge_zero( %x) { +; CHECK-LABEL: uge_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmlt p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: ret + %y = fcmp uge %x, zeroinitializer + ret %y +} +define @ult_zero( %x) { +; CHECK-LABEL: ult_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: ret + %y = fcmp ult %x, zeroinitializer + ret %y +} +define @ule_zero( %x) { +; CHECK-LABEL: ule_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: ret + %y = fcmp ule %x, zeroinitializer + ret %y +} +define @une_zero( %x) { +; CHECK-LABEL: une_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: ret + %y = fcmp une %x, zeroinitializer + ret %y +} diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll @@ -617,8 +617,7 @@ ; VBITS_GE_1024-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] ; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].d, vl16 ; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] -; VBITS_GE_1024-NEXT: mov [[ZERO:z[0-9]+]].h, #0 -; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]].h +; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0 ; VBITS_GE_1024-NEXT: ld1h { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] ; VBITS_GE_1024-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s ; VBITS_GE_1024-NEXT: uzp1 [[UZP2:z[0-9]+]].h, [[UZP1]].h, [[UZP1]].h @@ -638,8 +637,7 @@ ; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 ; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]].h +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0 ; VBITS_GE_2048-NEXT: ld1h { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] ; VBITS_GE_2048-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s ; VBITS_GE_2048-NEXT: uzp1 [[UZP2:z[0-9]+]].h, [[UZP1]].h, [[UZP1]].h @@ -702,8 +700,7 @@ ; VBITS_GE_512-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_GE_512-NEXT: ptrue [[PG1:p[0-9]+]].d, vl8 ; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] -; VBITS_GE_512-NEXT: mov [[ZERO:z[0-9]+]].s, #0 -; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]].s +; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0 ; VBITS_GE_512-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] ; VBITS_GE_512-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s ; VBITS_GE_512-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0] @@ -722,8 +719,7 @@ ; VBITS_GE_1024-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].d, vl16 ; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] -; VBITS_GE_1024-NEXT: mov [[ZERO:z[0-9]+]].s, #0 -; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]].s +; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0 ; VBITS_GE_1024-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] ; VBITS_GE_1024-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s ; VBITS_GE_1024-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0] @@ -742,8 +738,7 @@ ; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 ; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]].s +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0 ; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] ; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s ; VBITS_GE_2048-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0] @@ -795,8 +790,7 @@ ; CHECK: ptrue [[PG0:p[0-9]+]].d, vl4 ; CHECK-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] ; CHECK-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] -; CHECK-NEXT: mov [[ZERO:z[0-9]+]].d, #0 -; CHECK-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]].d +; CHECK-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0.0 ; CHECK-NEXT: ld1d { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] ; CHECK-NEXT: st1d { [[RES]].d }, [[PG0]], [x0] ; CHECK-NEXT: ret @@ -813,8 +807,7 @@ ; VBITS_GE_512: ptrue [[PG0:p[0-9]+]].d, vl8 ; VBITS_GE_512-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] ; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] -; VBITS_GE_512-NEXT: mov [[ZERO:z[0-9]+]].d, #0 -; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]].d +; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0.0 ; VBITS_GE_512-NEXT: ld1d { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG0]], [x0] ; VBITS_GE_512-NEXT: ret @@ -831,8 +824,7 @@ ; VBITS_GE_1024: ptrue [[PG0:p[0-9]+]].d, vl16 ; VBITS_GE_1024-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] ; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] -; VBITS_GE_1024-NEXT: mov [[ZERO:z[0-9]+]].d, #0 -; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]].d +; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0.0 ; VBITS_GE_1024-NEXT: ld1d { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG0]], [x0] ; VBITS_GE_1024-NEXT: ret @@ -849,8 +841,7 @@ ; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].d, vl32 ; VBITS_GE_2048-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].d, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]].d +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0.0 ; VBITS_GE_2048-NEXT: ld1d { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG0]], [x0] ; VBITS_GE_2048-NEXT: ret @@ -871,8 +862,7 @@ ; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32 ; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0 ; VBITS_GE_2048-NEXT: ld1h { [[RES:z[0-9]+]].s }, [[MASK]]/z, [x2, [[PTRS]].s, sxtw #1] ; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].h, [[RES]].h, [[RES]].h ; VBITS_GE_2048-NEXT: st1h { [[UZP]].h }, [[PG0]], [x0] @@ -893,8 +883,7 @@ ; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32 ; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0 ; VBITS_GE_2048-NEXT: ld1h { [[RES:z[0-9]+]].s }, [[MASK]]/z, [x2, [[PTRS]].s, uxtw #1] ; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].h, [[RES]].h, [[RES]].h ; VBITS_GE_2048-NEXT: st1h { [[UZP]].h }, [[PG0]], [x0] @@ -915,8 +904,7 @@ ; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32 ; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0 ; VBITS_GE_2048-NEXT: ld1h { [[RES:z[0-9]+]].s }, [[MASK]]/z, [x2, [[PTRS]].s, sxtw] ; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].h, [[RES]].h, [[RES]].h ; VBITS_GE_2048-NEXT: st1h { [[UZP]].h }, [[PG0]], [x0] @@ -938,8 +926,7 @@ ; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32 ; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0 ; VBITS_GE_2048-NEXT: ld1h { [[RES:z[0-9]+]].s }, [[MASK]]/z, [x2, [[PTRS]].s, uxtw] ; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].h, [[RES]].h, [[RES]].h ; VBITS_GE_2048-NEXT: st1h { [[UZP]].h }, [[PG0]], [x0] @@ -961,8 +948,7 @@ ; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 ; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0 ; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, [x2, [[PTRS]].d, lsl #2] ; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s ; VBITS_GE_2048-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0] @@ -982,8 +968,7 @@ ; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 ; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0 ; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, [x2, [[PTRS]].d] ; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s ; VBITS_GE_2048-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0] @@ -1006,9 +991,8 @@ ; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] ; VBITS_GE_2048-NEXT: mov [[OFF:z[0-9]+]].d, x2 -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 ; VBITS_GE_2048-NEXT: add [[PTRS_ADD:z[0-9]+]].d, [[PG1]]/m, [[PTRS]].d, [[OFF]].d -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0 ; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS_ADD]].d] ; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s ; VBITS_GE_2048-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0] @@ -1031,9 +1015,8 @@ ; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] ; VBITS_GE_2048-NEXT: mov [[OFF:z[0-9]+]].d, #4 -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 ; VBITS_GE_2048-NEXT: add [[PTRS_ADD:z[0-9]+]].d, [[PG1]]/m, [[PTRS]].d, [[OFF]].d -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0 ; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS_ADD]].d] ; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s ; VBITS_GE_2048-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0] @@ -1054,9 +1037,8 @@ ; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 ; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] ; VBITS_GE_2048-NEXT: ld1w { [[PT:z[0-9]+]].s }, [[PG0]]/z, [x2] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0 ; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] ; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s ; VBITS_GE_2048-NEXT: sel [[SEL:z[0-9]+]].s, [[PG1]], [[UZP]].s, [[PT]].s @@ -1077,8 +1059,7 @@ ; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 ; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0 ; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] ; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s ; VBITS_GE_2048-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll @@ -562,8 +562,7 @@ ; VBITS_GE_1024-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] ; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].d, vl16 ; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] -; VBITS_GE_1024-NEXT: mov [[ZERO:z[0-9]+]].h, #0 -; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0 ; VBITS_GE_1024-NEXT: uunpklo [[UPK1:z[0-9]+]].s, [[VALS]].h ; VBITS_GE_1024-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s ; VBITS_GE_1024-NEXT: st1h { [[UPK2]].d }, [[MASK]], {{\[}}[[PTRS]].d] @@ -581,8 +580,7 @@ ; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 ; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0 ; VBITS_GE_2048-NEXT: uunpklo [[UPK1:z[0-9]+]].s, [[VALS]].h ; VBITS_GE_2048-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s ; VBITS_GE_2048-NEXT: st1h { [[UPK2]].d }, [[MASK]], {{\[}}[[PTRS]].d] @@ -639,8 +637,7 @@ ; VBITS_GE_512-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_GE_512-NEXT: ptrue [[PG1:p[0-9]+]].d, vl8 ; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] -; VBITS_GE_512-NEXT: mov [[ZERO:z[0-9]+]].s, #0 -; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0 ; VBITS_GE_512-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s ; VBITS_GE_512-NEXT: st1w { [[UPK]].d }, [[MASK]], {{\[}}[[PTRS]].d] ; VBITS_GE_512-NEXT: ret @@ -657,8 +654,7 @@ ; VBITS_GE_1024-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].d, vl16 ; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] -; VBITS_GE_1024-NEXT: mov [[ZERO:z[0-9]+]].s, #0 -; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0 ; VBITS_GE_1024-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s ; VBITS_GE_1024-NEXT: st1w { [[UPK]].d }, [[MASK]], {{\[}}[[PTRS]].d] ; VBITS_GE_1024-NEXT: ret @@ -675,8 +671,7 @@ ; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 ; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0 ; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s ; VBITS_GE_2048-NEXT: st1w { [[UPK]].d }, [[MASK]], {{\[}}[[PTRS]].d] ; VBITS_GE_2048-NEXT: ret @@ -723,8 +718,7 @@ ; CHECK: ptrue [[PG0:p[0-9]+]].d, vl4 ; CHECK-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] ; CHECK-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] -; CHECK-NEXT: mov [[ZERO:z[0-9]+]].d, #0 -; CHECK-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]].d +; CHECK-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0.0 ; CHECK-NEXT: st1d { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS]].d] ; CHECK-NEXT: ret %vals = load <4 x double>, <4 x double>* %a @@ -739,8 +733,7 @@ ; VBITS_GE_512: ptrue [[PG0:p[0-9]+]].d, vl8 ; VBITS_GE_512-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] ; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] -; VBITS_GE_512-NEXT: mov [[ZERO:z[0-9]+]].d, #0 -; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]] +; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0.0 ; VBITS_GE_512-NEXT: st1d { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS]].d] ; VBITS_GE_512-NEXT: ret %vals = load <8 x double>, <8 x double>* %a @@ -755,8 +748,7 @@ ; VBITS_GE_1024: ptrue [[PG0:p[0-9]+]].d, vl16 ; VBITS_GE_1024-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] ; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] -; VBITS_GE_1024-NEXT: mov [[ZERO:z[0-9]+]].d, #0 -; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]] +; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0.0 ; VBITS_GE_1024-NEXT: st1d { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS]].d] ; VBITS_GE_1024-NEXT: ret %vals = load <16 x double>, <16 x double>* %a @@ -771,8 +763,7 @@ ; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].d, vl32 ; VBITS_GE_2048-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].d, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0.0 ; VBITS_GE_2048-NEXT: st1d { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS]].d] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x double>, <32 x double>* %a @@ -791,8 +782,7 @@ ; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32 ; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0 ; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].s, [[VALS]].h ; VBITS_GE_2048-NEXT: st1h { [[VALS]].s }, [[MASK]], [x2, [[PTRS]].s, sxtw #1] ; VBITS_GE_2048-NEXT: ret @@ -811,8 +801,7 @@ ; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32 ; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0 ; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].s, [[VALS]].h ; VBITS_GE_2048-NEXT: st1h { [[VALS]].s }, [[MASK]], [x2, [[PTRS]].s, uxtw #1] ; VBITS_GE_2048-NEXT: ret @@ -831,8 +820,7 @@ ; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32 ; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0 ; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].s, [[VALS]].h ; VBITS_GE_2048-NEXT: st1h { [[VALS]].s }, [[MASK]], [x2, [[PTRS]].s, sxtw] ; VBITS_GE_2048-NEXT: ret @@ -852,8 +840,7 @@ ; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32 ; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0.0 ; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].s, [[VALS]].h ; VBITS_GE_2048-NEXT: st1h { [[VALS]].s }, [[MASK]], [x2, [[PTRS]].s, uxtw] ; VBITS_GE_2048-NEXT: ret @@ -873,8 +860,7 @@ ; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 ; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0 ; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s ; VBITS_GE_2048-NEXT: st1w { [[VALS]].d }, [[MASK]], [x2, [[PTRS]].d, lsl #2] ; VBITS_GE_2048-NEXT: ret @@ -892,8 +878,7 @@ ; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 ; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0 ; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s ; VBITS_GE_2048-NEXT: st1w { [[VALS]].d }, [[MASK]], [x2, [[PTRS]].d] ; VBITS_GE_2048-NEXT: ret @@ -914,8 +899,7 @@ ; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] ; VBITS_GE_2048-NEXT: mov [[OFF:z[0-9]+]].d, x2 -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0 ; VBITS_GE_2048-NEXT: add [[PTRS_ADD:z[0-9]+]].d, [[PG1]]/m, [[PTRS]].d, [[OFF]].d ; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s ; VBITS_GE_2048-NEXT: st1w { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS_ADD]].d] @@ -937,8 +921,7 @@ ; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] ; VBITS_GE_2048-NEXT: mov [[OFF:z[0-9]+]].d, #4 -; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 -; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0.0 ; VBITS_GE_2048-NEXT: add [[PTRS_ADD:z[0-9]+]].d, [[PG1]]/m, [[PTRS]].d, [[OFF]].d ; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s ; VBITS_GE_2048-NEXT: st1w { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS_ADD]].d] diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-compares.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-compares.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-compares.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-compares.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s ; @@ -6,8 +7,9 @@ define @facge_h( %pg, %a, %b) { ; CHECK-LABEL: facge_h: -; CHECK: facge p0.h, p0/z, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: facge p0.h, p0/z, z0.h, z1.h +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.facge.nxv8f16( %pg, %a, %b) @@ -16,8 +18,9 @@ define @facge_s( %pg, %a, %b) { ; CHECK-LABEL: facge_s: -; CHECK: facge p0.s, p0/z, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: facge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.facge.nxv4f32( %pg, %a, %b) @@ -26,8 +29,9 @@ define @facge_d( %pg, %a, %b) { ; CHECK-LABEL: facge_d: -; CHECK: facge p0.d, p0/z, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: facge p0.d, p0/z, z0.d, z1.d +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.facge.nxv2f64( %pg, %a, %b) @@ -40,8 +44,9 @@ define @facgt_h( %pg, %a, %b) { ; CHECK-LABEL: facgt_h: -; CHECK: facgt p0.h, p0/z, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: facgt p0.h, p0/z, z0.h, z1.h +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.facgt.nxv8f16( %pg, %a, %b) @@ -50,8 +55,9 @@ define @facgt_s( %pg, %a, %b) { ; CHECK-LABEL: facgt_s: -; CHECK: facgt p0.s, p0/z, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: facgt p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.facgt.nxv4f32( %pg, %a, %b) @@ -60,8 +66,9 @@ define @facgt_d( %pg, %a, %b) { ; CHECK-LABEL: facgt_d: -; CHECK: facgt p0.d, p0/z, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: facgt p0.d, p0/z, z0.d, z1.d +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.facgt.nxv2f64( %pg, %a, %b) @@ -74,8 +81,9 @@ define @fcmeq_h( %pg, %a, %b) { ; CHECK-LABEL: fcmeq_h: -; CHECK: fcmeq p0.h, p0/z, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, z1.h +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.fcmpeq.nxv8f16( %pg, %a, %b) @@ -84,8 +92,9 @@ define @fcmeq_s( %pg, %a, %b) { ; CHECK-LABEL: fcmeq_s: -; CHECK: fcmeq p0.s, p0/z, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.fcmpeq.nxv4f32( %pg, %a, %b) @@ -94,22 +103,35 @@ define @fcmeq_d( %pg, %a, %b) { ; CHECK-LABEL: fcmeq_d: -; CHECK: fcmeq p0.d, p0/z, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, z1.d +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.fcmpeq.nxv2f64( %pg, %a, %b) ret %out } +define @fcmeq_zero( %pg, %a) { +; CHECK-LABEL: fcmeq_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcmpeq.nxv2f64( %pg, + %a, + zeroinitializer) + ret %out +} + ; ; FCMGE ; define @fcmge_h( %pg, %a, %b) { ; CHECK-LABEL: fcmge_h: -; CHECK: fcmge p0.h, p0/z, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fcmge p0.h, p0/z, z0.h, z1.h +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.fcmpge.nxv8f16( %pg, %a, %b) @@ -118,8 +140,9 @@ define @fcmge_s( %pg, %a, %b) { ; CHECK-LABEL: fcmge_s: -; CHECK: fcmge p0.s, p0/z, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.fcmpge.nxv4f32( %pg, %a, %b) @@ -128,22 +151,34 @@ define @fcmge_d( %pg, %a, %b) { ; CHECK-LABEL: fcmge_d: -; CHECK: fcmge p0.d, p0/z, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fcmge p0.d, p0/z, z0.d, z1.d +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.fcmpge.nxv2f64( %pg, %a, %b) ret %out } +define @fcmge_zero( %pg, %a) { +; CHECK-LABEL: fcmge_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmge p0.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcmpge.nxv2f64( %pg, + %a, + zeroinitializer) + ret %out +} ; ; FCMGT ; define @fcmgt_h( %pg, %a, %b) { ; CHECK-LABEL: fcmgt_h: -; CHECK: fcmgt p0.h, p0/z, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.fcmpgt.nxv8f16( %pg, %a, %b) @@ -152,8 +187,9 @@ define @fcmgt_s( %pg, %a, %b) { ; CHECK-LABEL: fcmgt_s: -; CHECK: fcmgt p0.s, p0/z, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.fcmpgt.nxv4f32( %pg, %a, %b) @@ -162,22 +198,34 @@ define @fcmgt_d( %pg, %a, %b) { ; CHECK-LABEL: fcmgt_d: -; CHECK: fcmgt p0.d, p0/z, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z1.d +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.fcmpgt.nxv2f64( %pg, %a, %b) ret %out } +define @fcmgt_zero( %pg, %a) { +; CHECK-LABEL: fcmgt_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcmpgt.nxv2f64( %pg, + %a, + zeroinitializer) + ret %out +} ; ; FCMNE ; define @fcmne_h( %pg, %a, %b) { ; CHECK-LABEL: fcmne_h: -; CHECK: fcmne p0.h, p0/z, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fcmne p0.h, p0/z, z0.h, z1.h +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.fcmpne.nxv8f16( %pg, %a, %b) @@ -186,8 +234,9 @@ define @fcmne_s( %pg, %a, %b) { ; CHECK-LABEL: fcmne_s: -; CHECK: fcmne p0.s, p0/z, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.fcmpne.nxv4f32( %pg, %a, %b) @@ -196,22 +245,35 @@ define @fcmne_d( %pg, %a, %b) { ; CHECK-LABEL: fcmne_d: -; CHECK: fcmne p0.d, p0/z, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fcmne p0.d, p0/z, z0.d, z1.d +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.fcmpne.nxv2f64( %pg, %a, %b) ret %out } +define @fcmne_zero( %pg, %a) { +; CHECK-LABEL: fcmne_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmne p0.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcmpne.nxv2f64( %pg, + %a, + zeroinitializer) + ret %out +} + ; ; FCMPUO ; define @fcmuo_h( %pg, %a, %b) { ; CHECK-LABEL: fcmuo_h: -; CHECK: fcmuo p0.h, p0/z, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z1.h +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.fcmpuo.nxv8f16( %pg, %a, %b) @@ -220,8 +282,9 @@ define @fcmuo_s( %pg, %a, %b) { ; CHECK-LABEL: fcmuo_s: -; CHECK: fcmuo p0.s, p0/z, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.fcmpuo.nxv4f32( %pg, %a, %b) @@ -230,8 +293,9 @@ define @fcmuo_d( %pg, %a, %b) { ; CHECK-LABEL: fcmuo_d: -; CHECK: fcmuo p0.d, p0/z, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z1.d +; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.fcmpuo.nxv2f64( %pg, %a, %b)