Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -34474,28 +34474,10 @@ isa(St->getValue()) && !cast(St->getValue())->isVolatile() && St->getChain().hasOneUse() && !St->isVolatile()) { - SDNode* LdVal = St->getValue().getNode(); - LoadSDNode *Ld = nullptr; - int TokenFactorIndex = -1; + LoadSDNode *Ld = cast(St->getValue().getNode()); SmallVector Ops; - SDNode* ChainVal = St->getChain().getNode(); - // Must be a store of a load. We currently handle two cases: the load - // is a direct child, and it's under an intervening TokenFactor. It is - // possible to dig deeper under nested TokenFactors. - if (ChainVal == LdVal) - Ld = cast(St->getChain()); - else if (St->getValue().hasOneUse() && - ChainVal->getOpcode() == ISD::TokenFactor) { - for (unsigned i = 0, e = ChainVal->getNumOperands(); i != e; ++i) { - if (ChainVal->getOperand(i).getNode() == LdVal) { - TokenFactorIndex = i; - Ld = cast(St->getValue()); - } else - Ops.push_back(ChainVal->getOperand(i)); - } - } - if (!Ld || !ISD::isNormalLoad(Ld)) + if (!ISD::isNormalLoad(Ld)) return SDValue(); // If this is not the MMX case, i.e. we are just turning i64 load/store @@ -34512,17 +34494,12 @@ if (Subtarget.is64Bit() || F64IsLegal) { MVT LdVT = Subtarget.is64Bit() ? MVT::i64 : MVT::f64; SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), - Ld->getPointerInfo(), Ld->getAlignment(), - Ld->getMemOperand()->getFlags()); + Ld->getMemOperand()); + // Make sure new load is placed in same chain order. - SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, NewLd); - if (TokenFactorIndex >= 0) { - Ops.push_back(NewChain); - NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); - } - return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(), - St->getPointerInfo(), St->getAlignment(), - St->getMemOperand()->getFlags()); + DAG.makeEquivalentMemoryOrdering(Ld, NewLd); + return DAG.getStore(St->getChain(), StDL, NewLd, St->getBasePtr(), + St->getMemOperand()); } // Otherwise, lower to two pairs of 32-bit loads / stores. @@ -34537,23 +34514,19 @@ MinAlign(Ld->getAlignment(), 4), Ld->getMemOperand()->getFlags()); // Make sure new loads are placed in same chain order. - SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, LoLd); - NewChain = DAG.makeEquivalentMemoryOrdering(Ld, HiLd); - - if (TokenFactorIndex >= 0) { - Ops.push_back(NewChain); - NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); - } + DAG.makeEquivalentMemoryOrdering(Ld, LoLd); + DAG.makeEquivalentMemoryOrdering(Ld, HiLd); LoAddr = St->getBasePtr(); HiAddr = DAG.getMemBasePlusOffset(LoAddr, 4, StDL); SDValue LoSt = - DAG.getStore(NewChain, StDL, LoLd, LoAddr, St->getPointerInfo(), + DAG.getStore(St->getChain(), StDL, LoLd, LoAddr, St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); - SDValue HiSt = DAG.getStore( - NewChain, StDL, HiLd, HiAddr, St->getPointerInfo().getWithOffset(4), - MinAlign(St->getAlignment(), 4), St->getMemOperand()->getFlags()); + SDValue HiSt = DAG.getStore(St->getChain(), StDL, HiLd, HiAddr, + St->getPointerInfo().getWithOffset(4), + MinAlign(St->getAlignment(), 4), + St->getMemOperand()->getFlags()); return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt); } Index: llvm/trunk/test/CodeGen/X86/nontemporal.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/nontemporal.ll +++ llvm/trunk/test/CodeGen/X86/nontemporal.ll @@ -9,45 +9,42 @@ ; X32-SSE: # %bb.0: ; X32-SSE-NEXT: pushl %ebp ; X32-SSE-NEXT: movl %esp, %ebp -; X32-SSE-NEXT: pushl %edi ; X32-SSE-NEXT: pushl %esi ; X32-SSE-NEXT: andl $-16, %esp -; X32-SSE-NEXT: movl 76(%ebp), %ecx +; X32-SSE-NEXT: subl $16, %esp +; X32-SSE-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero ; X32-SSE-NEXT: movl 12(%ebp), %eax -; X32-SSE-NEXT: movdqa 56(%ebp), %xmm3 -; X32-SSE-NEXT: movdqa 40(%ebp), %xmm4 -; X32-SSE-NEXT: movdqa 24(%ebp), %xmm5 -; X32-SSE-NEXT: movl 8(%ebp), %esi -; X32-SSE-NEXT: movl 80(%ebp), %edx -; X32-SSE-NEXT: movl (%edx), %edi +; X32-SSE-NEXT: movdqa 56(%ebp), %xmm4 +; X32-SSE-NEXT: movdqa 40(%ebp), %xmm5 +; X32-SSE-NEXT: movdqa 24(%ebp), %xmm6 +; X32-SSE-NEXT: movl 8(%ebp), %edx +; X32-SSE-NEXT: movl 80(%ebp), %ecx +; X32-SSE-NEXT: movl (%ecx), %esi ; X32-SSE-NEXT: addps {{\.LCPI.*}}, %xmm0 -; X32-SSE-NEXT: movntps %xmm0, (%esi) +; X32-SSE-NEXT: movntps %xmm0, (%edx) ; X32-SSE-NEXT: paddq {{\.LCPI.*}}, %xmm2 -; X32-SSE-NEXT: addl (%edx), %edi -; X32-SSE-NEXT: movntdq %xmm2, (%esi) +; X32-SSE-NEXT: addl (%ecx), %esi +; X32-SSE-NEXT: movntdq %xmm2, (%edx) ; X32-SSE-NEXT: addpd {{\.LCPI.*}}, %xmm1 -; X32-SSE-NEXT: addl (%edx), %edi -; X32-SSE-NEXT: movntpd %xmm1, (%esi) -; X32-SSE-NEXT: paddd {{\.LCPI.*}}, %xmm5 -; X32-SSE-NEXT: addl (%edx), %edi -; X32-SSE-NEXT: movntdq %xmm5, (%esi) -; X32-SSE-NEXT: paddw {{\.LCPI.*}}, %xmm4 -; X32-SSE-NEXT: addl (%edx), %edi -; X32-SSE-NEXT: movntdq %xmm4, (%esi) -; X32-SSE-NEXT: paddb {{\.LCPI.*}}, %xmm3 -; X32-SSE-NEXT: addl (%edx), %edi -; X32-SSE-NEXT: movntdq %xmm3, (%esi) -; X32-SSE-NEXT: addl (%edx), %edi -; X32-SSE-NEXT: movntil %eax, (%esi) -; X32-SSE-NEXT: movl (%edx), %eax -; X32-SSE-NEXT: movntil %ecx, 4(%esi) -; X32-SSE-NEXT: movl 72(%ebp), %ecx -; X32-SSE-NEXT: movntil %ecx, (%esi) -; X32-SSE-NEXT: addl %edi, %eax -; X32-SSE-NEXT: addl (%edx), %eax -; X32-SSE-NEXT: leal -8(%ebp), %esp +; X32-SSE-NEXT: addl (%ecx), %esi +; X32-SSE-NEXT: movntpd %xmm1, (%edx) +; X32-SSE-NEXT: paddd {{\.LCPI.*}}, %xmm6 +; X32-SSE-NEXT: addl (%ecx), %esi +; X32-SSE-NEXT: movntdq %xmm6, (%edx) +; X32-SSE-NEXT: paddw {{\.LCPI.*}}, %xmm5 +; X32-SSE-NEXT: addl (%ecx), %esi +; X32-SSE-NEXT: movntdq %xmm5, (%edx) +; X32-SSE-NEXT: paddb {{\.LCPI.*}}, %xmm4 +; X32-SSE-NEXT: addl (%ecx), %esi +; X32-SSE-NEXT: movntdq %xmm4, (%edx) +; X32-SSE-NEXT: addl (%ecx), %esi +; X32-SSE-NEXT: movntil %eax, (%edx) +; X32-SSE-NEXT: movl (%ecx), %eax +; X32-SSE-NEXT: addl %esi, %eax +; X32-SSE-NEXT: movsd %xmm3, (%edx) +; X32-SSE-NEXT: addl (%ecx), %eax +; X32-SSE-NEXT: leal -4(%ebp), %esp ; X32-SSE-NEXT: popl %esi -; X32-SSE-NEXT: popl %edi ; X32-SSE-NEXT: popl %ebp ; X32-SSE-NEXT: retl ; @@ -55,45 +52,42 @@ ; X32-AVX: # %bb.0: ; X32-AVX-NEXT: pushl %ebp ; X32-AVX-NEXT: movl %esp, %ebp -; X32-AVX-NEXT: pushl %edi ; X32-AVX-NEXT: pushl %esi ; X32-AVX-NEXT: andl $-16, %esp -; X32-AVX-NEXT: movl 76(%ebp), %ecx +; X32-AVX-NEXT: subl $16, %esp +; X32-AVX-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero ; X32-AVX-NEXT: movl 12(%ebp), %eax -; X32-AVX-NEXT: vmovdqa 56(%ebp), %xmm3 -; X32-AVX-NEXT: vmovdqa 40(%ebp), %xmm4 -; X32-AVX-NEXT: vmovdqa 24(%ebp), %xmm5 -; X32-AVX-NEXT: movl 8(%ebp), %esi +; X32-AVX-NEXT: vmovdqa 56(%ebp), %xmm4 +; X32-AVX-NEXT: vmovdqa 40(%ebp), %xmm5 +; X32-AVX-NEXT: vmovdqa 24(%ebp), %xmm6 +; X32-AVX-NEXT: movl 8(%ebp), %ecx ; X32-AVX-NEXT: movl 80(%ebp), %edx -; X32-AVX-NEXT: movl (%edx), %edi +; X32-AVX-NEXT: movl (%edx), %esi ; X32-AVX-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-AVX-NEXT: vmovntps %xmm0, (%esi) +; X32-AVX-NEXT: vmovntps %xmm0, (%ecx) ; X32-AVX-NEXT: vpaddq {{\.LCPI.*}}, %xmm2, %xmm0 -; X32-AVX-NEXT: addl (%edx), %edi -; X32-AVX-NEXT: vmovntdq %xmm0, (%esi) +; X32-AVX-NEXT: addl (%edx), %esi +; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx) ; X32-AVX-NEXT: vaddpd {{\.LCPI.*}}, %xmm1, %xmm0 -; X32-AVX-NEXT: addl (%edx), %edi -; X32-AVX-NEXT: vmovntpd %xmm0, (%esi) -; X32-AVX-NEXT: vpaddd {{\.LCPI.*}}, %xmm5, %xmm0 -; X32-AVX-NEXT: addl (%edx), %edi -; X32-AVX-NEXT: vmovntdq %xmm0, (%esi) -; X32-AVX-NEXT: vpaddw {{\.LCPI.*}}, %xmm4, %xmm0 -; X32-AVX-NEXT: addl (%edx), %edi -; X32-AVX-NEXT: vmovntdq %xmm0, (%esi) -; X32-AVX-NEXT: vpaddb {{\.LCPI.*}}, %xmm3, %xmm0 -; X32-AVX-NEXT: addl (%edx), %edi -; X32-AVX-NEXT: vmovntdq %xmm0, (%esi) -; X32-AVX-NEXT: addl (%edx), %edi -; X32-AVX-NEXT: movntil %eax, (%esi) +; X32-AVX-NEXT: addl (%edx), %esi +; X32-AVX-NEXT: vmovntpd %xmm0, (%ecx) +; X32-AVX-NEXT: vpaddd {{\.LCPI.*}}, %xmm6, %xmm0 +; X32-AVX-NEXT: addl (%edx), %esi +; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx) +; X32-AVX-NEXT: vpaddw {{\.LCPI.*}}, %xmm5, %xmm0 +; X32-AVX-NEXT: addl (%edx), %esi +; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx) +; X32-AVX-NEXT: vpaddb {{\.LCPI.*}}, %xmm4, %xmm0 +; X32-AVX-NEXT: addl (%edx), %esi +; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx) +; X32-AVX-NEXT: addl (%edx), %esi +; X32-AVX-NEXT: movntil %eax, (%ecx) ; X32-AVX-NEXT: movl (%edx), %eax -; X32-AVX-NEXT: movntil %ecx, 4(%esi) -; X32-AVX-NEXT: movl 72(%ebp), %ecx -; X32-AVX-NEXT: movntil %ecx, (%esi) -; X32-AVX-NEXT: addl %edi, %eax +; X32-AVX-NEXT: addl %esi, %eax +; X32-AVX-NEXT: vmovsd %xmm3, (%ecx) ; X32-AVX-NEXT: addl (%edx), %eax -; X32-AVX-NEXT: leal -8(%ebp), %esp +; X32-AVX-NEXT: leal -4(%ebp), %esp ; X32-AVX-NEXT: popl %esi -; X32-AVX-NEXT: popl %edi ; X32-AVX-NEXT: popl %ebp ; X32-AVX-NEXT: retl ;