diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6241,8 +6241,8 @@ // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load auto *MLoad = dyn_cast(N0); ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true); - if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && N0.hasOneUse() && - Splat && N1.hasOneUse()) { + if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat && + N1.hasOneUse()) { EVT LoadVT = MLoad->getMemoryVT(); EVT ExtVT = VT; if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) { @@ -6252,11 +6252,16 @@ uint64_t ElementSize = LoadVT.getVectorElementType().getScalarSizeInBits(); if (Splat->getAPIntValue().isMask(ElementSize)) { - return DAG.getMaskedLoad( + auto NewLoad = DAG.getMaskedLoad( ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(), MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(), LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(), ISD::ZEXTLOAD, MLoad->isExpandingLoad()); + bool LoadHasOtherUsers = !N0.hasOneUse(); + CombineTo(N, NewLoad); + if (LoadHasOtherUsers) + CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1)); + return SDValue(N, 0); } } } diff --git a/llvm/test/CodeGen/AArch64/sve-load-compare-store.ll b/llvm/test/CodeGen/AArch64/sve-load-compare-store.ll --- a/llvm/test/CodeGen/AArch64/sve-load-compare-store.ll +++ b/llvm/test/CodeGen/AArch64/sve-load-compare-store.ll @@ -6,9 +6,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: and z1.s, z1.s, #0xffff -; CHECK-NEXT: cmphs p0.s, p0/z, z1.s, #0 +; CHECK-NEXT: cmphs p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: st1b { z0.s }, p0, [x1] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll --- a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll @@ -2079,7 +2079,7 @@ ; CHECK-LE-NEXT: vmov r0, r1, d8 ; CHECK-LE-NEXT: vmov r2, r3, d9 ; CHECK-LE-NEXT: bl foo -; CHECK-LE-NEXT: vmovlb.u16 q0, q4 +; CHECK-LE-NEXT: vmov q0, q4 ; CHECK-LE-NEXT: vpop {d8, d9} ; CHECK-LE-NEXT: pop {r7, pc} ; @@ -2091,13 +2091,12 @@ ; CHECK-BE-NEXT: vpush {d8, d9} ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr -; CHECK-BE-NEXT: vldrht.u32 q4, [r0] -; CHECK-BE-NEXT: vrev64.32 q0, q4 -; CHECK-BE-NEXT: vmov r1, r0, d0 -; CHECK-BE-NEXT: vmov r3, r2, d1 +; CHECK-BE-NEXT: vldrht.u32 q0, [r0] +; CHECK-BE-NEXT: vrev64.32 q4, q0 +; CHECK-BE-NEXT: vmov r1, r0, d8 +; CHECK-BE-NEXT: vmov r3, r2, d9 ; CHECK-BE-NEXT: bl foo -; CHECK-BE-NEXT: vmovlb.u16 q1, q4 -; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: vmov q0, q4 ; CHECK-BE-NEXT: vpop {d8, d9} ; CHECK-BE-NEXT: pop {r7, pc} entry: