Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1247,7 +1247,6 @@ setLoadExtAction(Op, MVT::nxv2i64, MVT::nxv2i16, Legal); setLoadExtAction(Op, MVT::nxv2i64, MVT::nxv2i32, Legal); setLoadExtAction(Op, MVT::nxv4i32, MVT::nxv4i8, Legal); - setLoadExtAction(Op, MVT::nxv2i32, MVT::nxv2i16, Legal); setLoadExtAction(Op, MVT::nxv4i32, MVT::nxv4i16, Legal); setLoadExtAction(Op, MVT::nxv8i16, MVT::nxv8i8, Legal); } @@ -14405,6 +14404,30 @@ EVT MemVT; + // Zero/any extend of an MLOAD + if (const MaskedLoadSDNode *MLd = dyn_cast(Src)) { + MemVT = MLd->getMemoryVT(); + MVT::SimpleValueType Ty = MemVT.getSimpleVT().SimpleTy; + SDValue Dup = N->getOperand(1); + if (Dup.getOpcode() != AArch64ISD::DUP) + return SDValue(); + + SDLoc DL(N); + ConstantSDNode *C = dyn_cast(Dup->getOperand(0)); + if (!C) + return SDValue(); + + uint64_t ExtVal = C->getZExtValue(); + + // If the mask is fully covered by the MLOAD, we don't need to push + // a new AND onto the operand + EVT EltTy = MemVT.getVectorElementType(); + if ((ExtVal == 0xFF && EltTy == MVT::i8) || + (ExtVal == 0xFFFF && EltTy == MVT::i16) || + (ExtVal == 0xFFFFFFFF && EltTy == MVT::i32)) + return Src; + } + // SVE load instructions perform an implicit zero-extend, which makes them // perfect candidates for combining. switch (Opc) { Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll +++ llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll @@ -97,7 +97,7 @@ ; CHECK-LABEL: masked_zload_2i16_2f64: ; CHECK: ld1h { z0.d }, p0/z, [x0] ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ucvtf z0.d, p0/m, z0.s +; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d ; CHECK-NEXT: ret %wide.load = call @llvm.masked.load.nxv2i16(* %in, i32 2, %mask, undef) %zext = zext %wide.load to