Index: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h @@ -1222,8 +1222,9 @@ /// If an existing load has uses of its chain, create a token factor node with /// that chain and the new memory node's chain and update users of the old /// chain to the token factor. This ensures that the new memory node will have - /// the same relative memory dependency position as the old load. - void makeEquivalentMemoryOrdering(LoadSDNode *Old, SDValue New); + /// the same relative memory dependency position as the old load. Returns the + /// new merged load chain. + SDValue makeEquivalentMemoryOrdering(LoadSDNode *Old, SDValue New); /// Topological-sort the AllNodes list and a /// assign a unique node id for each node in the DAG based on their Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7339,22 +7339,23 @@ AddDbgValue(I, ToNode, false); } -void SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, - SDValue NewMemOp) { +SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, + SDValue NewMemOp) { assert(isa(NewMemOp.getNode()) && "Expected a memop node"); - if (!OldLoad->hasAnyUseOfValue(1)) - return; - // The new memory operation must have the same position as the old load in // terms of memory dependency. Create a TokenFactor for the old load and new // memory operation and update uses of the old load's output chain to use that // TokenFactor. SDValue OldChain = SDValue(OldLoad, 1); SDValue NewChain = SDValue(NewMemOp.getNode(), 1); + if (!OldLoad->hasAnyUseOfValue(1)) + return NewChain; + SDValue TokenFactor = getNode(ISD::TokenFactor, SDLoc(OldLoad), MVT::Other, OldChain, NewChain); ReplaceAllUsesOfValueWith(OldChain, TokenFactor); UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain); + return TokenFactor; } //===----------------------------------------------------------------------===// Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -33349,7 +33349,8 @@ SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->getAlignment(), Ld->getMemOperand()->getFlags()); - SDValue NewChain = NewLd.getValue(1); + // Make sure new load is placed in same chain order. + SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, NewLd); if (TokenFactorIndex >= 0) { Ops.push_back(NewChain); NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); @@ -33370,11 +33371,12 @@ Ld->getPointerInfo().getWithOffset(4), MinAlign(Ld->getAlignment(), 4), Ld->getMemOperand()->getFlags()); + // Make sure new loads are placed in same chain order. + SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, LoLd); + NewChain = DAG.makeEquivalentMemoryOrdering(Ld, HiLd); - SDValue NewChain = LoLd.getValue(1); if (TokenFactorIndex >= 0) { - Ops.push_back(LoLd); - Ops.push_back(HiLd); + Ops.push_back(NewChain); NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); } Index: llvm/trunk/test/CodeGen/X86/pr34088.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr34088.ll +++ llvm/trunk/test/CodeGen/X86/pr34088.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown -mcpu=pentium4 | FileCheck %s + +%struct.Foo = type { i32, %struct.Bar } +%struct.Bar = type { i32, %struct.Buffer, i32 } +%struct.Buffer = type { i8*, i32 } + +; This test checks that the load of store %2 is not dropped. +; +define i32 @pr34088() local_unnamed_addr { +; CHECK-LABEL: pr34088: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .Lcfi0: +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .Lcfi1: +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: .Lcfi2: +; CHECK-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NEXT: andl $-16, %esp +; CHECK-NEXT: subl $32, %esp +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205] +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movaps %xmm0, (%esp) +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movaps %xmm1, (%esp) +; CHECK-NEXT: movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %ebp, %esp +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl +entry: + %foo = alloca %struct.Foo, align 4 + %0 = bitcast %struct.Foo* %foo to i8* + call void @llvm.memset.p0i8.i32(i8* nonnull %0, i8 0, i32 20, i32 4, i1 false) + %buffer1 = getelementptr inbounds %struct.Foo, %struct.Foo* %foo, i32 0, i32 1, i32 1 + %1 = bitcast %struct.Buffer* %buffer1 to i64* + %2 = load i64, i64* %1, align 4 + call void @llvm.memset.p0i8.i32(i8* nonnull %0, i8 -51, i32 20, i32 4, i1 false) + store i64 %2, i64* %1, align 4 + ret i32 0 +} + +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1)