Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5283,6 +5283,31 @@ return N1; if (ISD::isBuildVectorAllOnes(N1.getNode())) return N0; + + // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load + MaskedLoadSDNode *MLoad = dyn_cast(N0); + BuildVectorSDNode *BVec = dyn_cast(N1); + if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD && + N0.hasOneUse() && N1.hasOneUse()) { + EVT LoadVT = MLoad->getMemoryVT(); + EVT ExtVT = VT; + if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) { + // For this AND to be a zero extension of the masked load the elements + // of the BuildVec must mask the bottom bits of the extended element + // type + if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) { + TypeSize ElementSize = + LoadVT.getVectorElementType().getScalarSizeInBits(); + if (Splat->getAPIntValue().isMask((uint64_t)ElementSize)) { + return DAG.getMaskedLoad( + ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(), + MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(), + LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(), + ISD::ZEXTLOAD, MLoad->isExpandingLoad()); + } + } + } + } } // fold (and c1, c2) -> c1&c2 Index: llvm/test/CodeGen/Thumb2/mve-zext-masked-load.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-zext-masked-load.ll +++ llvm/test/CodeGen/Thumb2/mve-zext-masked-load.ll @@ -7,7 +7,6 @@ ; CHECK-NEXT: vmovlb.s16 q0, q0 ; CHECK-NEXT: vpt.s32 lt, q0, zr ; CHECK-NEXT: vldrht.u32 q0, [r0] -; CHECK-NEXT: vmovlb.u16 q0, q0 ; CHECK-NEXT: vcvt.f32.u32 q0, q0 ; CHECK-NEXT: bx lr entry: @@ -23,7 +22,6 @@ ; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vpt.s16 lt, q0, zr ; CHECK-NEXT: vldrbt.u16 q0, [r0] -; CHECK-NEXT: vmovlb.u8 q0, q0 ; CHECK-NEXT: vcvt.f16.u16 q0, q0 ; CHECK-NEXT: bx lr entry: @@ -37,11 +35,9 @@ ; CHECK-LABEL: foo_v4i8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmovlb.s8 q0, q0 -; CHECK-NEXT: vmov.i32 q1, #0xff ; CHECK-NEXT: vmovlb.s16 q0, q0 ; CHECK-NEXT: vpt.s32 lt, q0, zr ; CHECK-NEXT: vldrbt.u32 q0, [r0] -; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vcvt.f32.u32 q0, q0 ; CHECK-NEXT: bx lr entry: