diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17529,9 +17529,32 @@ return SDValue(); } +// This function canonicalizes ZERO_EXTEND to VSELECT +// to allow zext to transform into a predicated instruction like add, sub or mul. +static SDValue canonicalizeZeroExtendToVSelect(SDNode *N, SelectionDAG &DAG) { + assert(N->getOpcode() == ISD::ZERO_EXTEND && "Expected ZERO_EXTEND"); + SDValue Op = N->getOperand(0); + + if (!Op->getValueType(0).isScalableVector() || + Op->getValueType(0).getScalarType() != MVT::i1) + return SDValue(); + + SDLoc DL = SDLoc(N); + EVT VT = N->getValueType(0); + SDValue Ones = DAG.getConstant(1, DL, VT); + SDValue Zeros = DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0), Op, Ones, + Zeros); +} + static SDValue performExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { + if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND) { + if (SDValue Value = canonicalizeZeroExtendToVSelect(N, DAG)) + return Value; + } + // If we see something like (zext (sabd (extract_high ...), (DUP ...))) then // we can convert that DUP into another extract_high (of a bigger DUP), which // helps the backend to decide that an sabdl2 would be useful, saving a real diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -512,13 +512,13 @@ defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", AArch64fneg_mt>; // zext(cmpeq(x, splat(0))) -> cnot(x) - def : Pat<(nxv16i8 (zext (nxv16i1 (AArch64setcc_z (nxv16i1 (SVEAllActive):$Pg), nxv16i8:$Op2, (SVEDup0), SETEQ)))), + def : Pat<(nxv16i8 (vselect (nxv16i1 (AArch64setcc_z (nxv16i1 (SVEAllActive):$Pg), nxv16i8:$Op2, (SVEDup0), SETEQ)), (nxv16i8 (splat_vector (i32 1))), (nxv16i8 (splat_vector (i32 0))))), (CNOT_ZPmZ_B $Op2, $Pg, $Op2)>; - def : Pat<(nxv8i16 (zext (nxv8i1 (AArch64setcc_z (nxv8i1 (SVEAllActive):$Pg), nxv8i16:$Op2, (SVEDup0), SETEQ)))), + def : Pat<(nxv8i16 (vselect (nxv8i1 (AArch64setcc_z (nxv8i1 (SVEAllActive):$Pg), nxv8i16:$Op2, (SVEDup0), SETEQ)), (nxv8i16 (splat_vector (i32 1))), (nxv8i16 (splat_vector (i32 0))))), (CNOT_ZPmZ_H $Op2, $Pg, $Op2)>; - def : Pat<(nxv4i32 (zext (nxv4i1 (AArch64setcc_z (nxv4i1 (SVEAllActive):$Pg), nxv4i32:$Op2, (SVEDup0), SETEQ)))), + def : Pat<(nxv4i32 (vselect (nxv4i1 (AArch64setcc_z (nxv4i1 (SVEAllActive):$Pg), nxv4i32:$Op2, (SVEDup0), SETEQ)), (nxv4i32 (splat_vector (i32 1))), (nxv4i32 (splat_vector (i32 0))))), (CNOT_ZPmZ_S $Op2, $Pg, $Op2)>; - def : Pat<(nxv2i64 (zext (nxv2i1 (AArch64setcc_z (nxv2i1 (SVEAllActive):$Pg), nxv2i64:$Op2, (SVEDup0), SETEQ)))), + def : Pat<(nxv2i64 (vselect (nxv2i1 (AArch64setcc_z (nxv2i1 (SVEAllActive):$Pg), nxv2i64:$Op2, (SVEDup0), SETEQ)), (nxv2i64 (splat_vector (i64 1))), (nxv2i64 (splat_vector (i64 0))))), (CNOT_ZPmZ_D $Op2, $Pg, $Op2)>; defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", AArch64smax_m1, DestructiveBinaryComm>; diff --git a/llvm/test/CodeGen/AArch64/predicated-add-sub-mul.ll b/llvm/test/CodeGen/AArch64/predicated-add-sub-mul.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/predicated-add-sub-mul.ll @@ -0,0 +1,446 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux" + +define @zext.add2( %a0, %a1) #0 { +; CHECK-LABEL: zext.add2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z1.d, z1.d, #0xffffffff +; CHECK-NEXT: and z2.d, z2.d, #0xffffffff +; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z1.d +; CHECK-NEXT: mov z1.d, #1 // =0x1 +; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = zext %v to + %result = add %zero.extend, %a0 + ret %result +} + +define @zext.add4( %a0, %a1) #0 { +; CHECK-LABEL: zext.add4: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #1 // =0x1 +; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = zext %v to + %result = add %zero.extend, %a0 + ret %result +} + +define @zext.add8( %a0, %a1) #0 { +; CHECK-LABEL: zext.add8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmphi p1.s, p0/z, z1.s, z3.s +; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.s, #1 // =0x1 +; CHECK-NEXT: add z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: add z1.s, p1/m, z1.s, z2.s +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = zext %v to + %result = add %zero.extend, %a0 + ret %result +} + +define @zext.add16( %a0, %a1) #0 { +; CHECK-LABEL: zext.add16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmphi p1.s, p0/z, z3.s, z7.s +; CHECK-NEXT: cmphi p2.s, p0/z, z2.s, z6.s +; CHECK-NEXT: cmphi p3.s, p0/z, z1.s, z5.s +; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z4.s +; CHECK-NEXT: mov z4.s, #1 // =0x1 +; CHECK-NEXT: add z0.s, p0/m, z0.s, z4.s +; CHECK-NEXT: add z1.s, p3/m, z1.s, z4.s +; CHECK-NEXT: add z2.s, p2/m, z2.s, z4.s +; CHECK-NEXT: add z3.s, p1/m, z3.s, z4.s +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = zext %v to + %result = add %zero.extend, %a0 + ret %result +} + +define @zext.sub2( %a0, %a1) #0 { +; CHECK-LABEL: zext.sub2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z1.d, z1.d, #0xffffffff +; CHECK-NEXT: and z2.d, z2.d, #0xffffffff +; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z1.d +; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 +; CHECK-NEXT: sub z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = zext %v to + %result = sub %zero.extend, %a0 + ret %result +} + +define @zext.sub4( %a0, %a1) #0 { +; CHECK-LABEL: zext.sub4: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 +; CHECK-NEXT: sub z0.s, z1.s, z0.s +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = zext %v to + %result = sub %zero.extend, %a0 + ret %result +} + +define @zext.sub8( %a0, %a1) #0 { +; CHECK-LABEL: zext.sub8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: cmphi p0.s, p0/z, z1.s, z3.s +; CHECK-NEXT: mov z2.s, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z3.s, p1/z, #1 // =0x1 +; CHECK-NEXT: sub z0.s, z3.s, z0.s +; CHECK-NEXT: sub z1.s, z2.s, z1.s +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = zext %v to + %result = sub %zero.extend, %a0 + ret %result +} + +define @zext.sub16( %a0, %a1) #0 { +; CHECK-LABEL: zext.sub16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmphi p1.s, p0/z, z2.s, z6.s +; CHECK-NEXT: cmphi p2.s, p0/z, z0.s, z4.s +; CHECK-NEXT: cmphi p3.s, p0/z, z1.s, z5.s +; CHECK-NEXT: cmphi p0.s, p0/z, z3.s, z7.s +; CHECK-NEXT: mov z4.s, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z5.s, p1/z, #1 // =0x1 +; CHECK-NEXT: mov z6.s, p3/z, #1 // =0x1 +; CHECK-NEXT: mov z7.s, p2/z, #1 // =0x1 +; CHECK-NEXT: sub z0.s, z7.s, z0.s +; CHECK-NEXT: sub z1.s, z6.s, z1.s +; CHECK-NEXT: sub z2.s, z5.s, z2.s +; CHECK-NEXT: sub z3.s, z4.s, z3.s +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = zext %v to + %result = sub %zero.extend, %a0 + ret %result +} + +define @zext.mul2( %a0, %a1) #0 { +; CHECK-LABEL: zext.mul2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z1.d, z1.d, #0xffffffff +; CHECK-NEXT: and z2.d, z2.d, #0xffffffff +; CHECK-NEXT: cmphi p1.d, p0/z, z2.d, z1.d +; CHECK-NEXT: mov z1.d, p1/z, #1 // =0x1 +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = zext %v to + %result = mul %zero.extend, %a0 + ret %result +} + +define @zext.mul4( %a0, %a1) #0 { +; CHECK-LABEL: zext.mul4: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: mov z1.s, p1/z, #1 // =0x1 +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = zext %v to + %result = mul %zero.extend, %a0 + ret %result +} + +define @zext.mul8( %a0, %a1) #0 { +; CHECK-LABEL: zext.mul8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: cmphi p2.s, p0/z, z1.s, z3.s +; CHECK-NEXT: mov z2.s, p2/z, #1 // =0x1 +; CHECK-NEXT: mov z3.s, p1/z, #1 // =0x1 +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z3.s +; CHECK-NEXT: mul z1.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = zext %v to + %result = mul %zero.extend, %a0 + ret %result +} + +define @zext.mul16( %a0, %a1) #0 { +; CHECK-LABEL: zext.mul16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: cmphi p4.s, p0/z, z3.s, z7.s +; CHECK-NEXT: cmphi p2.s, p0/z, z0.s, z4.s +; CHECK-NEXT: mov z4.s, p4/z, #1 // =0x1 +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: cmphi p1.s, p0/z, z2.s, z6.s +; CHECK-NEXT: cmphi p3.s, p0/z, z1.s, z5.s +; CHECK-NEXT: mov z5.s, p1/z, #1 // =0x1 +; CHECK-NEXT: mov z6.s, p3/z, #1 // =0x1 +; CHECK-NEXT: mov z7.s, p2/z, #1 // =0x1 +; CHECK-NEXT: mul z1.s, p0/m, z1.s, z6.s +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z7.s +; CHECK-NEXT: mul z2.s, p0/m, z2.s, z5.s +; CHECK-NEXT: mul z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = zext %v to + %result = mul %zero.extend, %a0 + ret %result +} + +define @sext.add2( %a0, %a1) #0 { +; CHECK-LABEL: sext.add2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z1.d, z1.d, #0xffffffff +; CHECK-NEXT: and z2.d, z2.d, #0xffffffff +; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z1.d +; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = sext %v to + %result = add %zero.extend, %a0 + ret %result +} + +define @sext.add4( %a0, %a1) #0 { +; CHECK-LABEL: sext.add4: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.s, z1.s, z0.s +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = sext %v to + %result = add %zero.extend, %a0 + ret %result +} + +define @sext.add8( %a0, %a1) #0 { +; CHECK-LABEL: sext.add8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: cmphi p0.s, p0/z, z1.s, z3.s +; CHECK-NEXT: mov z2.s, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z3.s, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.s, z3.s, z0.s +; CHECK-NEXT: add z1.s, z2.s, z1.s +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = sext %v to + %result = add %zero.extend, %a0 + ret %result +} + +define @sext.add16( %a0, %a1) #0 { +; CHECK-LABEL: sext.add16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmphi p1.s, p0/z, z2.s, z6.s +; CHECK-NEXT: cmphi p2.s, p0/z, z0.s, z4.s +; CHECK-NEXT: cmphi p3.s, p0/z, z1.s, z5.s +; CHECK-NEXT: cmphi p0.s, p0/z, z3.s, z7.s +; CHECK-NEXT: mov z4.s, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z5.s, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z6.s, p3/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z7.s, p2/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.s, z7.s, z0.s +; CHECK-NEXT: add z1.s, z6.s, z1.s +; CHECK-NEXT: add z2.s, z5.s, z2.s +; CHECK-NEXT: add z3.s, z4.s, z3.s +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = sext %v to + %result = add %zero.extend, %a0 + ret %result +} + +define @sext.sub2( %a0, %a1) #0 { +; CHECK-LABEL: sext.sub2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z1.d, z1.d, #0xffffffff +; CHECK-NEXT: and z2.d, z2.d, #0xffffffff +; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z1.d +; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = sext %v to + %result = sub %zero.extend, %a0 + ret %result +} + +define @sext.sub4( %a0, %a1) #0 { +; CHECK-LABEL: sext.sub4: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.s, z1.s, z0.s +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = sext %v to + %result = sub %zero.extend, %a0 + ret %result +} + +define @sext.sub8( %a0, %a1) #0 { +; CHECK-LABEL: sext.sub8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: cmphi p0.s, p0/z, z1.s, z3.s +; CHECK-NEXT: mov z2.s, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z3.s, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.s, z3.s, z0.s +; CHECK-NEXT: sub z1.s, z2.s, z1.s +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = sext %v to + %result = sub %zero.extend, %a0 + ret %result +} + +define @sext.sub16( %a0, %a1) #0 { +; CHECK-LABEL: sext.sub16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmphi p1.s, p0/z, z2.s, z6.s +; CHECK-NEXT: cmphi p2.s, p0/z, z0.s, z4.s +; CHECK-NEXT: cmphi p3.s, p0/z, z1.s, z5.s +; CHECK-NEXT: cmphi p0.s, p0/z, z3.s, z7.s +; CHECK-NEXT: mov z4.s, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z5.s, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z6.s, p3/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z7.s, p2/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.s, z7.s, z0.s +; CHECK-NEXT: sub z1.s, z6.s, z1.s +; CHECK-NEXT: sub z2.s, z5.s, z2.s +; CHECK-NEXT: sub z3.s, z4.s, z3.s +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = sext %v to + %result = sub %zero.extend, %a0 + ret %result +} + +define @sext.mul2( %a0, %a1) #0 { +; CHECK-LABEL: sext.mul2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z1.d, z1.d, #0xffffffff +; CHECK-NEXT: and z2.d, z2.d, #0xffffffff +; CHECK-NEXT: cmphi p1.d, p0/z, z2.d, z1.d +; CHECK-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = sext %v to + %result = mul %zero.extend, %a0 + ret %result +} + +define @sext.mul4( %a0, %a1) #0 { +; CHECK-LABEL: sext.mul4: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: mov z1.s, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = sext %v to + %result = mul %zero.extend, %a0 + ret %result +} + +define @sext.mul8( %a0, %a1) #0 { +; CHECK-LABEL: sext.mul8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: cmphi p2.s, p0/z, z1.s, z3.s +; CHECK-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z3.s, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z3.s +; CHECK-NEXT: mul z1.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = sext %v to + %result = mul %zero.extend, %a0 + ret %result +} + +define @sext.mul16( %a0, %a1) #0 { +; CHECK-LABEL: sext.mul16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: cmphi p4.s, p0/z, z3.s, z7.s +; CHECK-NEXT: cmphi p2.s, p0/z, z0.s, z4.s +; CHECK-NEXT: mov z4.s, p4/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: cmphi p1.s, p0/z, z2.s, z6.s +; CHECK-NEXT: cmphi p3.s, p0/z, z1.s, z5.s +; CHECK-NEXT: mov z5.s, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z6.s, p3/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z7.s, p2/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mul z1.s, p0/m, z1.s, z6.s +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z7.s +; CHECK-NEXT: mul z2.s, p0/m, z2.s, z5.s +; CHECK-NEXT: mul z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %v = icmp ugt %a0, %a1 + %zero.extend = sext %v to + %result = mul %zero.extend, %a0 + ret %result +} + +attributes #0 = { "target-features"="+sve" }