Index: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1513,12 +1513,15 @@ SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, SDValue &PatternNodeWithChain) { - if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { + // Need to make sure that the SCALAR_TO_VECTOR and load are both only used + // once. Otherwise the load might get duplicated and the chain output of the + // duplicate load will not be observed by all dependencies. + if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) { PatternNodeWithChain = N.getOperand(0); if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) && PatternNodeWithChain.hasOneUse() && - IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && - IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { + IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) && + IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) { LoadSDNode *LD = cast(PatternNodeWithChain); if (!selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) return false; Index: llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll +++ llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll @@ -381,33 +381,37 @@ ; X32-LABEL: double_fold: ; X32: ## BB#0: ## %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movaps %xmm0, %xmm1 -; X32-NEXT: minss (%eax), %xmm1 -; X32-NEXT: maxss (%eax), %xmm0 -; X32-NEXT: addps %xmm1, %xmm0 +; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-NEXT: movaps %xmm0, %xmm2 +; X32-NEXT: minss %xmm1, %xmm2 +; X32-NEXT: maxss %xmm1, %xmm0 +; X32-NEXT: addps %xmm2, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: double_fold: ; X64: ## BB#0: ## %entry -; X64-NEXT: movaps %xmm0, %xmm1 -; X64-NEXT: minss (%rdi), %xmm1 -; X64-NEXT: maxss (%rdi), %xmm0 -; X64-NEXT: addps %xmm1, %xmm0 +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: movaps %xmm0, %xmm2 +; X64-NEXT: minss %xmm1, %xmm2 +; X64-NEXT: maxss %xmm1, %xmm0 +; X64-NEXT: addps %xmm2, %xmm0 ; X64-NEXT: retq ; ; X32_AVX-LABEL: double_fold: ; X32_AVX: ## BB#0: ## %entry ; X32_AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32_AVX-NEXT: vminss (%eax), %xmm0, %xmm1 -; X32_AVX-NEXT: vmaxss (%eax), %xmm0, %xmm0 -; X32_AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; X32_AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32_AVX-NEXT: vminss %xmm1, %xmm0, %xmm2 +; X32_AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; X32_AVX-NEXT: vaddps %xmm0, %xmm2, %xmm0 ; X32_AVX-NEXT: retl ; ; X64_AVX-LABEL: double_fold: ; X64_AVX: ## BB#0: ## %entry -; X64_AVX-NEXT: vminss (%rdi), %xmm0, %xmm1 -; X64_AVX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 -; X64_AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; X64_AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64_AVX-NEXT: vminss %xmm1, %xmm0, %xmm2 +; X64_AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; X64_AVX-NEXT: vaddps %xmm0, %xmm2, %xmm0 ; X64_AVX-NEXT: retq entry: %0 = load float, float* %x, align 1