diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -384,6 +384,10 @@ SDValue replaceStoreOfFPConstant(StoreSDNode *ST); SDValue visitSTORE(SDNode *N); + + SDValue ImproveLifetimeNodeChain(SDNode *N); + + SDValue visitLIFETIME_START(SDNode *N); SDValue visitLIFETIME_END(SDNode *N); SDValue visitINSERT_VECTOR_ELT(SDNode *N); SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); @@ -1592,6 +1596,8 @@ case ISD::MLOAD: return visitMLOAD(N); case ISD::MSCATTER: return visitMSCATTER(N); case ISD::MSTORE: return visitMSTORE(N); + case ISD::LIFETIME_START: + return visitLIFETIME_START(N); case ISD::LIFETIME_END: return visitLIFETIME_END(N); case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); @@ -15569,7 +15575,32 @@ return ReduceLoadOpStoreWidth(N); } +SDValue DAGCombiner::ImproveLifetimeNodeChain(SDNode *N) { + auto Chain = N->getOperand(0); + auto NewChain = FindBetterChain(N, Chain); + if (NewChain != Chain) { + SDNode *N2 = DAG.UpdateNodeOperands(N, NewChain, N->getOperand(1)); + // Make sure users of new N still depend on Chain + auto TF = DAG.getNode(ISD::TokenFactor, SDLoc(N2), MVT::Other, Chain, + SDValue(N2, 0)); + DAG.ReplaceAllUsesOfValueWith(SDValue(N2, 0), TF); + AddToWorklist(DAG.UpdateNodeOperands(TF.getNode(), Chain, SDValue(N2, 0))); + AddToWorklist(N2); + return SDValue(N, 0); + } + return SDValue(); +} + +SDValue DAGCombiner::visitLIFETIME_START(SDNode *N) { + if (SDValue V = ImproveLifetimeNodeChain(N)) + return V; + return SDValue(); +} + SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { + if (SDValue V = ImproveLifetimeNodeChain(N)) + return V; + const auto *LifetimeEnd = cast(N); if (!LifetimeEnd->hasOffset()) return SDValue(); @@ -15580,12 +15611,10 @@ // We walk up the chains to find stores. SmallVector Chains = {N->getOperand(0)}; while (!Chains.empty()) { - SDValue Chain = Chains.back(); - Chains.pop_back(); - if (!Chain.hasOneUse()) - continue; + SDValue Chain = Chains.pop_back_val(); switch (Chain.getOpcode()) { case ISD::TokenFactor: + AddToWorklist(Chain.getNode()); for (unsigned Nops = Chain.getNumOperands(); Nops;) Chains.push_back(Chain.getOperand(--Nops)); break; @@ -15609,6 +15638,7 @@ dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase.dump(); dbgs() << "\n"); CombineTo(ST, ST->getChain()); + AddToWorklist(N); return SDValue(N, 0); } } diff --git a/llvm/test/CodeGen/X86/swap.ll b/llvm/test/CodeGen/X86/swap.ll --- a/llvm/test/CodeGen/X86/swap.ll +++ b/llvm/test/CodeGen/X86/swap.ll @@ -87,14 +87,12 @@ ; NOAA-LABEL: twoallocs: ; NOAA: # %bb.0: # %entry ; NOAA-NEXT: vmovups (%rdi), %xmm0 -; NOAA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; NOAA-NEXT: vmovups %xmm0, (%rsi) ; NOAA-NEXT: retq ; ; AA-LABEL: twoallocs: ; AA: # %bb.0: # %entry ; AA-NEXT: vmovups (%rdi), %xmm0 -; AA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AA-NEXT: vmovups %xmm0, (%rsi) ; AA-NEXT: retq entry: @@ -115,20 +113,13 @@ define dso_local void @onealloc_readback_1(i8* nocapture %a, i8* nocapture %b) local_unnamed_addr { ; NOAA-LABEL: onealloc_readback_1: ; NOAA: # %bb.0: # %entry -; NOAA-NEXT: vmovups (%rdi), %xmm0 -; NOAA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; NOAA-NEXT: vmovups (%rsi), %xmm0 -; NOAA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) -; NOAA-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 ; NOAA-NEXT: vmovups %xmm0, (%rdi) ; NOAA-NEXT: retq ; ; AA-LABEL: onealloc_readback_1: ; AA: # %bb.0: # %entry -; AA-NEXT: vmovups (%rdi), %xmm0 -; AA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AA-NEXT: vmovups (%rsi), %xmm0 -; AA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AA-NEXT: vmovups %xmm0, (%rdi) ; AA-NEXT: retq entry: @@ -148,20 +139,13 @@ define dso_local void @onealloc_readback_2(i8* nocapture %a, i8* nocapture %b) local_unnamed_addr { ; NOAA-LABEL: onealloc_readback_2: ; NOAA: # %bb.0: # %entry -; NOAA-NEXT: vmovups (%rdi), %xmm0 -; NOAA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; NOAA-NEXT: vmovups (%rsi), %xmm0 -; NOAA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) -; NOAA-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 ; NOAA-NEXT: vmovups %xmm0, (%rdi) ; NOAA-NEXT: retq ; ; AA-LABEL: onealloc_readback_2: ; AA: # %bb.0: # %entry -; AA-NEXT: vmovups (%rdi), %xmm0 -; AA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AA-NEXT: vmovups (%rsi), %xmm0 -; AA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AA-NEXT: vmovups %xmm0, (%rdi) ; AA-NEXT: retq entry: