diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17340,6 +17340,21 @@ {InverseSetCC, SelectB, SelectA}); } +static SDValue tryFoldVSelectMaskedLoad(SDNode *N, SelectionDAG &DAG) { + auto VSelectLHS = N->getOperand(1); + if (VSelectLHS.getOpcode() != ISD::MLOAD) + return SDValue(); + + APInt SplatValue; + if (!ISD::isConstantSplatVector(VSelectLHS.getOperand(4).getNode(), + SplatValue)) + return SDValue(); + if (!SplatValue.isZero()) + return SDValue(); + + return VSelectLHS; +} + // vselect (v1i1 setcc) -> // vselect (v1iXX setcc) (XX is the size of the compared operand type) // FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as @@ -17348,6 +17363,8 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) { if (auto SwapResult = trySwapVSelectOperands(N, DAG)) return SwapResult; + if (auto FoldMaskedLoadResult = tryFoldVSelectMaskedLoad(N, DAG)) + return FoldMaskedLoadResult; SDValue N0 = N->getOperand(0); EVT CCVT = N0.getValueType(); diff --git a/llvm/test/CodeGen/AArch64/sve-select.ll b/llvm/test/CodeGen/AArch64/sve-select.ll --- a/llvm/test/CodeGen/AArch64/sve-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-select.ll @@ -650,3 +650,20 @@ %sel = select <4 x i1> %p, <4 x float> %a, <4 x float> %fmul ret <4 x float> %sel } + +define @fold_vselect_masked_load_zero(i32* %ptr) { +; CHECK-LABEL: fold_vselect_masked_load_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl16 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %p = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 9) + %vscaleptr = bitcast i32* %ptr to * + %load = tail call @llvm.masked.load.nxv4i32.p0nxv4i32(* %vscaleptr, i32 1, %p, zeroinitializer) + %sel = select %p, %load, zeroinitializer + ret %sel +} + +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32 immarg) +declare @llvm.aarch64.sve.sel.nxv4i32(, , ) +declare @llvm.masked.load.nxv4i32.p0nxv4i32(*, i32, , ) \ No newline at end of file