Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14509,13 +14509,16 @@ SDValue SetCC = N->getOperand(0); EVT VT = SetCC.getValueType(); - if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1) + if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1 || + !isNullConstant(N->getOperand(1))) return SDValue(); // Restricted the DAG combine to only cases where we're extracting from a - // flag-setting operation - auto *Idx = dyn_cast(N->getOperand(1)); - if (!Idx || !Idx->isZero() || SetCC.getOpcode() != ISD::SETCC) + // flag-setting operation. + if ((SetCC.getOpcode() != ISD::SETCC) && + !(SetCC.getOpcode() == ISD::INTRINSIC_WO_CHAIN && + (SetCC.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo || + SetCC.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels))) return SDValue(); // Extracts of lane 0 for SVE can be expressed as PTEST(Op, FIRST) ? 1 : 0 Index: llvm/test/CodeGen/AArch64/sve-cmp-folds.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-cmp-folds.ll +++ llvm/test/CodeGen/AArch64/sve-cmp-folds.ll @@ -1,34 +1,57 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-unknown -mattr=+sve -o - < %s | FileCheck %s - -define i1 @foo_first( %a, %b) { -; CHECK-LABEL: foo_first: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s -; CHECK-NEXT: ptest p0, p1.b -; CHECK-NEXT: cset w0, mi -; CHECK-NEXT: ret - %vcond = fcmp oeq %a, %b - %bit = extractelement %vcond, i64 0 - ret i1 %bit -} - -define i1 @foo_last( %a, %b) { -; CHECK-LABEL: foo_last: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s -; CHECK-NEXT: ptest p0, p1.b -; CHECK-NEXT: cset w0, lo -; CHECK-NEXT: ret - %vcond = fcmp oeq %a, %b - %vscale = call i64 @llvm.vscale.i64() - %shl2 = shl nuw nsw i64 %vscale, 2 - %idx = add nuw nsw i64 %shl2, -1 - %bit = extractelement %vcond, i64 %idx - ret i1 %bit -} - - -declare i64 @llvm.vscale.i64() +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-unknown -mattr=+sve -o - < %s | FileCheck %s + +define i1 @foo_first( %a, %b) { +; CHECK-LABEL: foo_first: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p0, p1.b +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret + %vcond = fcmp oeq %a, %b + %bit = extractelement %vcond, i64 0 + ret i1 %bit +} + +define i1 @foo_last( %a, %b) { +; CHECK-LABEL: foo_last: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p0, p1.b +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %vcond = fcmp oeq %a, %b + %vscale = call i64 @llvm.vscale.i64() + %shl2 = shl nuw nsw i64 %vscale, 2 + %idx = add nuw nsw i64 %shl2, -1 + %bit = extractelement %vcond, i64 %idx + ret i1 %bit +} + +define i1 @whilelo_first(i64 %next, i64 %end) { +; CHECK-LABEL: whilelo_first: +; CHECK: // %bb.0: +; CHECK-NEXT: whilelo p0.s, x0, x1 +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret + %predicate = call @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64 %next, i64 %end) + %bit = extractelement %predicate, i64 0 + ret i1 %bit +} + +define i1 @whilels_first(i64 %next, i64 %end) { +; CHECK-LABEL: whilels_first: +; CHECK: // %bb.0: +; CHECK-NEXT: whilels p0.s, x0, x1 +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret + %predicate = call @llvm.aarch64.sve.whilels.nxv4i1.i64(i64 %next, i64 %end) + %bit = extractelement %predicate, i64 0 + ret i1 %bit +} + +declare i64 @llvm.vscale.i64() +declare @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64, i64) +declare @llvm.aarch64.sve.whilels.nxv4i1.i64(i64, i64)