Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -695,6 +695,9 @@ bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override; + bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, + EVT VT) const override; + /// Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15319,6 +15319,11 @@ return true; } +bool AArch64TargetLowering::shouldFoldSelectWithIdentityConstant( + unsigned BinOpcode, EVT VT) const { + return Subtarget->hasSVE() && VT.isScalableVector() && isTypeLegal(VT); +} + bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -401,19 +401,15 @@ (sub node:$op2, node:$op1)>; def AArch64add_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2), [(int_aarch64_sve_add node:$pred, node:$op1, node:$op2), - (vselect node:$pred, (add node:$op1, node:$op2), node:$op1), - (add node:$op1, (vselect node:$pred, node:$op2, (SVEDup0)))]>; + (vselect node:$pred, (add node:$op1, node:$op2), node:$op1)]>; def AArch64sub_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2), [(int_aarch64_sve_sub node:$pred, node:$op1, node:$op2), - (vselect node:$pred, (sub node:$op1, node:$op2), node:$op1), - (sub node:$op1, (vselect node:$pred, node:$op2, (SVEDup0)))]>; + (vselect node:$pred, (sub node:$op1, node:$op2), node:$op1)]>; def AArch64mul_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2), [(int_aarch64_sve_mul node:$pred, node:$op1, node:$op2), (vselect node:$pred, (AArch64mul_p (AArch64ptrue 31), node:$op1, node:$op2), node:$op1)]>; def AArch64mla_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3), [(int_aarch64_sve_mla node:$pred, node:$op1, node:$op2, node:$op3), - // add(a, select(mask, mul(b, c), splat(0))) -> mla(a, mask, b, c) - (add node:$op1, (vselect node:$pred, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op3), (SVEDup0))), // select(mask, add(a, mul(b, c))) -> mla(a, mask, b, c) (vselect node:$pred, (add node:$op1, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op3)), node:$op1)]>; // pattern for generating pseudo for MLA_ZPmZZ/MAD_ZPmZZ @@ -421,8 +417,6 @@ [(add node:$op1, (AArch64mul_p_oneuse node:$pred, node:$op2, node:$op3))]>; def AArch64mls_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3), [(int_aarch64_sve_mls node:$pred, node:$op1, node:$op2, node:$op3), - // sub(a, select(mask, mul(b, c), splat(0))) -> mls(a, mask, b, c) - (sub node:$op1, (vselect node:$pred, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op3), (SVEDup0))), // select(mask, sub(a, mul(b, c))) -> mls(a, mask, b, c) (vselect node:$pred, (sub node:$op1, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op3)), node:$op1)]>; def AArch64mls_p : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3), @@ -440,20 +434,18 @@ [(int_aarch64_sve_eor3 node:$op1, node:$op2, node:$op3), (xor node:$op1, (xor node:$op2, node:$op3))]>; -class fma_patfrags +class fma_patfrags : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3), [(intrinsic node:$pred, node:$op1, node:$op2, node:$op3), - (add_zero (SVEAllActive), node:$op1, (vselect node:$pred, (AArch64fmul_p_oneuse (SVEAllActive), node:$op2, node:$op3), (SVEDup0))), - (add_negzero (SVEAllActive), node:$op1, (vselect node:$pred, (AArch64fmul_p_oneuse (SVEAllActive), node:$op2, node:$op3), (SVEDupNeg0)))], - [{ - if ((N->getOpcode() != AArch64ISD::FADD_PRED) && - (N->getOpcode() != AArch64ISD::FSUB_PRED)) - return true; // it's the intrinsic - return N->getFlags().hasAllowContract(); + (vselect node:$pred, (add (SVEAllActive), node:$op1, (AArch64fmul_p_oneuse (SVEAllActive), node:$op2, node:$op3)), node:$op1)], +[{ + if (N->getOpcode() == ISD::VSELECT) + return N->getOperand(1)->getFlags().hasAllowContract(); + return true; // it's the intrinsic }]>; -def AArch64fmla_m1 : fma_patfrags; -def AArch64fmls_m1 : fma_patfrags; +def AArch64fmla_m1 : fma_patfrags; +def AArch64fmls_m1 : fma_patfrags; def AArch64smax_m1 : EitherVSelectOrPassthruPatFrags; def AArch64umax_m1 : EitherVSelectOrPassthruPatFrags; Index: llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll +++ llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll @@ -5,10 +5,10 @@ ; CHECK-LABEL: scalable_int_min_max: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov w8, #3745 +; CHECK-NEXT: mov w8, #3745 // =0xea1 ; CHECK-NEXT: movk w8, #16618, lsl #16 ; CHECK-NEXT: ld1w { z3.d }, p0/z, [x0] -; CHECK-NEXT: mov w9, #57344 +; CHECK-NEXT: mov w9, #57344 // =0xe000 ; CHECK-NEXT: mov z6.d, #1023 // =0x3ff ; CHECK-NEXT: movk w9, #17535, lsl #16 ; CHECK-NEXT: mov z4.s, w8 @@ -27,10 +27,11 @@ ; CHECK-NEXT: ld1w { z0.d }, p1/z, [z0.d] ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z4.s ; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z3.s +; CHECK-NEXT: add z0.d, z2.d, z1.d ; CHECK-NEXT: bic p2.b, p1/z, p1.b, p2.b -; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 -; CHECK-NEXT: add z2.d, p1/m, z2.d, z1.d -; CHECK-NEXT: uaddv d0, p0, z2.d +; CHECK-NEXT: mov z0.d, p2/m, z2.d +; CHECK-NEXT: sel z0.d, p1, z0.d, z2.d +; CHECK-NEXT: uaddv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret entry: Index: llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll +++ llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll @@ -118,9 +118,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0 -; CHECK-NEXT: mov z2.d, #1 // =0x1 -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d -; CHECK-NEXT: mul z0.d, z1.d, z0.d +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -134,9 +132,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0 -; CHECK-NEXT: mov z2.s, #1 // =0x1 -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s -; CHECK-NEXT: mul z0.s, z1.s, z0.s +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -150,9 +146,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0 -; CHECK-NEXT: mov z2.h, #1 // =0x1 -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h -; CHECK-NEXT: mul z0.h, z1.h, z0.h +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -166,9 +160,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0 -; CHECK-NEXT: mov z2.b, #1 // =0x1 -; CHECK-NEXT: sel z1.b, p0, z1.b, z2.b -; CHECK-NEXT: mul z0.b, z1.b, z0.b +; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -182,9 +174,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0 -; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d -; CHECK-NEXT: and z0.d, z1.d, z0.d +; CHECK-NEXT: and z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -198,9 +188,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0 -; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s -; CHECK-NEXT: and z0.d, z1.d, z0.d +; CHECK-NEXT: and z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -214,9 +202,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0 -; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h -; CHECK-NEXT: and z0.d, z1.d, z0.d +; CHECK-NEXT: and z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -230,9 +216,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0 -; CHECK-NEXT: mov z2.b, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sel z1.b, p0, z1.b, z2.b -; CHECK-NEXT: and z0.d, z1.d, z0.d +; CHECK-NEXT: and z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -246,9 +230,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0 -; CHECK-NEXT: mov z2.d, #0 // =0x0 -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d -; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: orr z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -262,9 +244,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0 -; CHECK-NEXT: mov z2.s, #0 // =0x0 -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s -; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: orr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -278,9 +258,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0 -; CHECK-NEXT: mov z2.h, #0 // =0x0 -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h -; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: orr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -294,9 +272,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0 -; CHECK-NEXT: mov z2.b, #0 // =0x0 -; CHECK-NEXT: sel z1.b, p0, z1.b, z2.b -; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: orr z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -310,9 +286,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0 -; CHECK-NEXT: mov z2.d, #0 // =0x0 -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d -; CHECK-NEXT: eor z0.d, z1.d, z0.d +; CHECK-NEXT: eor z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -326,9 +300,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0 -; CHECK-NEXT: mov z2.s, #0 // =0x0 -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s -; CHECK-NEXT: eor z0.d, z1.d, z0.d +; CHECK-NEXT: eor z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -342,9 +314,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0 -; CHECK-NEXT: mov z2.h, #0 // =0x0 -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h -; CHECK-NEXT: eor z0.d, z1.d, z0.d +; CHECK-NEXT: eor z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -358,9 +328,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0 -; CHECK-NEXT: mov z2.b, #0 // =0x0 -; CHECK-NEXT: sel z1.b, p0, z1.b, z2.b -; CHECK-NEXT: eor z0.d, z1.d, z0.d +; CHECK-NEXT: eor z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -374,9 +342,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0 -; CHECK-NEXT: mov z2.d, #0 // =0x0 -; CHECK-NEXT: sel z1.d, p1, z1.d, z2.d -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: lslr z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, p1/m, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -390,9 +357,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0 -; CHECK-NEXT: mov z2.s, #0 // =0x0 -; CHECK-NEXT: sel z1.s, p1, z1.s, z2.s -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: lslr z1.s, p0/m, z1.s, z0.s +; CHECK-NEXT: mov z0.s, p1/m, z1.s ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -406,9 +372,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0 -; CHECK-NEXT: mov z2.h, #0 // =0x0 -; CHECK-NEXT: sel z1.h, p1, z1.h, z2.h -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: lslr z1.h, p0/m, z1.h, z0.h +; CHECK-NEXT: mov z0.h, p1/m, z1.h ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -422,9 +387,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0 -; CHECK-NEXT: mov z2.b, #0 // =0x0 -; CHECK-NEXT: sel z1.b, p1, z1.b, z2.b -; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: lslr z1.b, p0/m, z1.b, z0.b +; CHECK-NEXT: mov z0.b, p1/m, z1.b ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -438,9 +402,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0 -; CHECK-NEXT: mov z2.d, #0 // =0x0 -; CHECK-NEXT: sel z1.d, p1, z1.d, z2.d -; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: asrr z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, p1/m, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -454,9 +417,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0 -; CHECK-NEXT: mov z2.s, #0 // =0x0 -; CHECK-NEXT: sel z1.s, p1, z1.s, z2.s -; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: asrr z1.s, p0/m, z1.s, z0.s +; CHECK-NEXT: mov z0.s, p1/m, z1.s ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -470,9 +432,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0 -; CHECK-NEXT: mov z2.h, #0 // =0x0 -; CHECK-NEXT: sel z1.h, p1, z1.h, z2.h -; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: asrr z1.h, p0/m, z1.h, z0.h +; CHECK-NEXT: mov z0.h, p1/m, z1.h ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -486,9 +447,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0 -; CHECK-NEXT: mov z2.b, #0 // =0x0 -; CHECK-NEXT: sel z1.b, p1, z1.b, z2.b -; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: asrr z1.b, p0/m, z1.b, z0.b +; CHECK-NEXT: mov z0.b, p1/m, z1.b ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -502,9 +462,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0 -; CHECK-NEXT: mov z2.d, #0 // =0x0 -; CHECK-NEXT: sel z1.d, p1, z1.d, z2.d -; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: lsrr z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, p1/m, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -518,9 +477,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0 -; CHECK-NEXT: mov z2.s, #0 // =0x0 -; CHECK-NEXT: sel z1.s, p1, z1.s, z2.s -; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: lsrr z1.s, p0/m, z1.s, z0.s +; CHECK-NEXT: mov z0.s, p1/m, z1.s ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -534,9 +492,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0 -; CHECK-NEXT: mov z2.h, #0 // =0x0 -; CHECK-NEXT: sel z1.h, p1, z1.h, z2.h -; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: lsrr z1.h, p0/m, z1.h, z0.h +; CHECK-NEXT: mov z0.h, p1/m, z1.h ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -550,9 +507,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0 -; CHECK-NEXT: mov z2.b, #0 // =0x0 -; CHECK-NEXT: sel z1.b, p1, z1.b, z2.b -; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: lsrr z1.b, p0/m, z1.b, z0.b +; CHECK-NEXT: mov z0.b, p1/m, z1.b ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -688,13 +644,10 @@ define @fadd_nxv4f32_x( %x, %y, %n) { ; CHECK-LABEL: fadd_nxv4f32_x: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0 ; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s -; CHECK-NEXT: fadd z0.s, z1.s, z0.s +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -706,13 +659,10 @@ define @fadd_nxv8f16_x( %x, %y, %n) { ; CHECK-LABEL: fadd_nxv8f16_x: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #32768 // =0x8000 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0 ; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h -; CHECK-NEXT: fadd z0.h, z1.h, z0.h +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -724,13 +674,10 @@ define @fadd_nxv2f64_x( %x, %y, %n) { ; CHECK-LABEL: fadd_nxv2f64_x: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0 ; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d -; CHECK-NEXT: fadd z0.d, z1.d, z0.d +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -789,10 +736,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0 -; CHECK-NEXT: fmov z2.s, #1.00000000 ; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s -; CHECK-NEXT: fmul z0.s, z1.s, z0.s +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -806,10 +751,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0 -; CHECK-NEXT: fmov z2.h, #1.00000000 ; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h -; CHECK-NEXT: fmul z0.h, z1.h, z0.h +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -823,10 +766,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0 -; CHECK-NEXT: fmov z2.d, #1.00000000 ; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d -; CHECK-NEXT: fmul z0.d, z1.d, z0.d +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -840,10 +781,9 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0 -; CHECK-NEXT: fmov z2.s, #1.00000000 +; CHECK-NEXT: fdivr z1.s, p0/m, z1.s, z0.s ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: sel z1.s, p1, z1.s, z2.s -; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: mov z0.s, p1/m, z1.s ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -857,10 +797,9 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0 -; CHECK-NEXT: fmov z2.h, #1.00000000 +; CHECK-NEXT: fdivr z1.h, p0/m, z1.h, z0.h ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: sel z1.h, p1, z1.h, z2.h -; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: mov z0.h, p1/m, z1.h ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -874,10 +813,9 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0 -; CHECK-NEXT: fmov z2.d, #1.00000000 +; CHECK-NEXT: fdivr z1.d, p0/m, z1.d, z0.d ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: sel z1.d, p1, z1.d, z2.d -; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: mov z0.d, p1/m, z1.d ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -889,14 +827,10 @@ define @fma_nxv4f32_x( %x, %y, %z, %n) { ; CHECK-LABEL: fma_nxv4f32_x: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: fcmle p1.s, p0/z, z3.s, #0.0 -; CHECK-NEXT: fmul z1.s, z1.s, z2.s ; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s -; CHECK-NEXT: fadd z0.s, z1.s, z0.s +; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -909,14 +843,10 @@ define @fma_nxv8f16_x( %x, %y, %z, %n) { ; CHECK-LABEL: fma_nxv8f16_x: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #32768 // =0x8000 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: fcmle p1.h, p0/z, z3.h, #0.0 -; CHECK-NEXT: fmul z1.h, z1.h, z2.h ; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h -; CHECK-NEXT: fadd z0.h, z1.h, z0.h +; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -929,14 +859,10 @@ define @fma_nxv2f64_x( %x, %y, %z, %n) { ; CHECK-LABEL: fma_nxv2f64_x: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fcmle p1.d, p0/z, z3.d, #0.0 -; CHECK-NEXT: fmul z1.d, z1.d, z2.d ; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d -; CHECK-NEXT: fadd z0.d, z1.d, z0.d +; CHECK-NEXT: fmla z0.d, p0/m, z1.d, z2.d ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -1071,9 +997,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0 -; CHECK-NEXT: mov z2.d, #1 // =0x1 -; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d -; CHECK-NEXT: mul z0.d, z0.d, z1.d +; CHECK-NEXT: mul z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1087,9 +1012,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0 -; CHECK-NEXT: mov z2.s, #1 // =0x1 -; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s -; CHECK-NEXT: mul z0.s, z0.s, z1.s +; CHECK-NEXT: mul z1.s, p0/m, z1.s, z0.s +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1103,9 +1027,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0 -; CHECK-NEXT: mov z2.h, #1 // =0x1 -; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h -; CHECK-NEXT: mul z0.h, z0.h, z1.h +; CHECK-NEXT: mul z1.h, p0/m, z1.h, z0.h +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1119,9 +1042,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0 -; CHECK-NEXT: mov z2.b, #1 // =0x1 -; CHECK-NEXT: sel z0.b, p0, z0.b, z2.b -; CHECK-NEXT: mul z0.b, z0.b, z1.b +; CHECK-NEXT: mul z1.b, p0/m, z1.b, z0.b +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1135,9 +1057,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0 -; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d -; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: and z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1151,9 +1072,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0 -; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s -; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: and z1.s, p0/m, z1.s, z0.s +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1167,9 +1087,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0 -; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h -; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: and z1.h, p0/m, z1.h, z0.h +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1183,9 +1102,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0 -; CHECK-NEXT: mov z2.b, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sel z0.b, p0, z0.b, z2.b -; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: and z1.b, p0/m, z1.b, z0.b +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1199,9 +1117,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0 -; CHECK-NEXT: mov z2.d, #0 // =0x0 -; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d -; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: orr z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1215,9 +1132,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0 -; CHECK-NEXT: mov z2.s, #0 // =0x0 -; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s -; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: orr z1.s, p0/m, z1.s, z0.s +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1231,9 +1147,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0 -; CHECK-NEXT: mov z2.h, #0 // =0x0 -; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h -; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: orr z1.h, p0/m, z1.h, z0.h +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1247,9 +1162,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0 -; CHECK-NEXT: mov z2.b, #0 // =0x0 -; CHECK-NEXT: sel z0.b, p0, z0.b, z2.b -; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: orr z1.b, p0/m, z1.b, z0.b +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1263,9 +1177,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0 -; CHECK-NEXT: mov z2.d, #0 // =0x0 -; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d -; CHECK-NEXT: eor z0.d, z0.d, z1.d +; CHECK-NEXT: eor z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1279,9 +1192,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0 -; CHECK-NEXT: mov z2.s, #0 // =0x0 -; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s -; CHECK-NEXT: eor z0.d, z0.d, z1.d +; CHECK-NEXT: eor z1.s, p0/m, z1.s, z0.s +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1295,9 +1207,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0 -; CHECK-NEXT: mov z2.h, #0 // =0x0 -; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h -; CHECK-NEXT: eor z0.d, z0.d, z1.d +; CHECK-NEXT: eor z1.h, p0/m, z1.h, z0.h +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1311,9 +1222,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0 -; CHECK-NEXT: mov z2.b, #0 // =0x0 -; CHECK-NEXT: sel z0.b, p0, z0.b, z2.b -; CHECK-NEXT: eor z0.d, z0.d, z1.d +; CHECK-NEXT: eor z1.b, p0/m, z1.b, z0.b +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1633,13 +1543,11 @@ define @fadd_nxv4f32_y( %x, %y, %n) { ; CHECK-LABEL: fadd_nxv4f32_y: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0 ; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s -; CHECK-NEXT: fadd z0.s, z0.s, z1.s +; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z0.s +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -1651,13 +1559,11 @@ define @fadd_nxv8f16_y( %x, %y, %n) { ; CHECK-LABEL: fadd_nxv8f16_y: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #32768 // =0x8000 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0 ; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h -; CHECK-NEXT: fadd z0.h, z0.h, z1.h +; CHECK-NEXT: fadd z1.h, p0/m, z1.h, z0.h +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -1669,13 +1575,11 @@ define @fadd_nxv2f64_y( %x, %y, %n) { ; CHECK-LABEL: fadd_nxv2f64_y: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0 ; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d -; CHECK-NEXT: fadd z0.d, z0.d, z1.d +; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -1737,10 +1641,9 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0 -; CHECK-NEXT: fmov z2.s, #1.00000000 ; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s -; CHECK-NEXT: fmul z0.s, z0.s, z1.s +; CHECK-NEXT: fmul z1.s, p0/m, z1.s, z0.s +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -1754,10 +1657,9 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0 -; CHECK-NEXT: fmov z2.h, #1.00000000 ; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h -; CHECK-NEXT: fmul z0.h, z0.h, z1.h +; CHECK-NEXT: fmul z1.h, p0/m, z1.h, z0.h +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -1771,10 +1673,9 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0 -; CHECK-NEXT: fmov z2.d, #1.00000000 ; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d -; CHECK-NEXT: fmul z0.d, z0.d, z1.d +; CHECK-NEXT: fmul z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer