Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8302,6 +8302,44 @@ } } + // If this extload is directly stored, and that store's input value was + // loaded before still, try to avoid the roundtrip through memory and + // extload directly from the original memory location. + // + // This can't be caught by the store-to-load forwarding above, because + // the extload's memory type might not be legal. + // Instead, try to match this: + // v4i32 load %ptr2, zext from v4i8 + // store i32 %value, %ptr2 + // %value = load i32 %ptr1 + // to turn the extload into: + // v4i32 load %ptr1, zext from v4i8 + if (!ISD::isNON_EXTLoad(N) && !LD->isVolatile() && + ISD::isNON_TRUNCStore(Chain.getNode())) { + SDLoc DL(N); + StoreSDNode *PrevST = cast(Chain); + EVT ValVT = PrevST->getValue().getValueType(); + EVT MemVT = LD->getMemoryVT(); + if (PrevST->getBasePtr() == Ptr && + (ValVT.getStoreSize() == MemVT.getStoreSize() && + ValVT.getSizeInBits() == MemVT.getSizeInBits()) && + ISD::isNormalLoad(PrevST->getValue().getNode())) { + LoadSDNode *PPrevLD = cast(PrevST->getValue()); + if (LD->getAddressSpace() == PPrevLD->getAddressSpace() && + LD->getAlignment() == PPrevLD->getAlignment()) { + SDValue NewLoad = DAG.getExtLoad( + LD->getExtensionType(), DL, LD->getValueType(0), + PPrevLD->getChain(), PPrevLD->getBasePtr(), + PPrevLD->getPointerInfo(), MemVT, /*isVolatile=*/false, + PPrevLD->isNonTemporal(), PPrevLD->isInvariant(), + PPrevLD->getAlignment(), PPrevLD->getAAInfo()); + SDValue Token = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain, + SDValue(NewLoad.getNode(), 1)); + return CombineTo(N, NewLoad, Token, true); + } + } + } + // Try to infer better alignment information than the load already has. if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { Index: test/CodeGen/X86/widen_load-2.ll =================================================================== --- test/CodeGen/X86/widen_load-2.ll +++ test/CodeGen/X86/widen_load-2.ll @@ -193,9 +193,7 @@ ; CHECK-NEXT: movd %[[CONSTANT1]], %e[[R1:[abcd]]]x ; CHECK-NEXT: movw %[[R1]]x, (%[[PTR1:.*]]) ; CHECK-NEXT: movb $1, 2(%[[PTR1]]) -; CHECK-NEXT: movl (%[[PTR0]]), [[TMP1:%e[abcd]+x]] -; CHECK-NEXT: movl [[TMP1]], [[TMP2:.*]] -; CHECK-NEXT: pmovzxbd [[TMP2]], %[[X0:xmm[0-9]+]] +; CHECK-NEXT: pmovzxbd (%[[PTR0]]), %[[X0:xmm[0-9]+]] ; CHECK-NEXT: pextrd $1, %[[X0]], %e[[R0:[abcd]]]x ; CHECK-NEXT: shrl %e[[R0]]x ; CHECK-NEXT: movd %[[X0]], %e[[R1:[abcd]]]x