Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2223,24 +2223,32 @@ if (foldedLoad) { SDValue Chain; + MachineSDNode *CNode = nullptr; SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) { SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue); - SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); + CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); ResHi = SDValue(CNode, 0); ResLo = SDValue(CNode, 1); Chain = SDValue(CNode, 2); InFlag = SDValue(CNode, 3); } else { SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); - SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); + CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); Chain = SDValue(CNode, 0); InFlag = SDValue(CNode, 1); } // Update the chain. ReplaceUses(N1.getValue(1), Chain); + // Record the mem-refs + LoadSDNode *LoadNode = cast(N1); + if (LoadNode) { + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LoadNode->getMemOperand(); + CNode->setMemRefs(MemOp, MemOp + 1); + } } else { SDValue Ops[] = { N1, InFlag }; if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) { Index: test/CodeGen/X86/hoist-invariant-load.ll =================================================================== --- test/CodeGen/X86/hoist-invariant-load.ll +++ test/CodeGen/X86/hoist-invariant-load.ll @@ -1,7 +1,10 @@ ; REQUIRES: asserts -; RUN: llc < %s -stats -O2 2>&1 | grep "2 machine-licm" +; RUN: llc -mcpu=haswell < %s -stats -O2 2>&1 | grep "4 machine-licm.*hoisted" +; For test: ; 2 invariant loads, 1 for OBJC_SELECTOR_REFERENCES_ ; and 1 for objc_msgSend from the GOT +; For test_multi_def: +; 2 invariant load (full multiply, both loads should be hoisted.) target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.7.2" @@ -29,4 +32,32 @@ declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind +define void @test_multi_def(i64* dereferenceable(8) %x1, + i64* dereferenceable(8) %x2, + i128* %y, i64 %count) nounwind { +entry: + br label %for.body + +for.check: + %inc = add nsw i64 %i, 1 + %done = icmp sge i64 %inc, %count + br i1 %done, label %exit, label %for.body + +for.body: + %i = phi i64 [ 0, %entry ], [ %inc, %for.check ] + %x1_load = load i64, i64* %x1, align 8, !invariant.load !0 + %x1_zext = zext i64 %x1_load to i128 + %x2_load = load i64, i64* %x2, align 8, !invariant.load !0 + %x2_zext = zext i64 %x2_load to i128 + %x_prod = mul i128 %x1_zext, %x2_zext + %y_elem = getelementptr inbounds i128, i128* %y, i64 %i + %y_load = load i128, i128* %y_elem, align 8 + %y_plus = add i128 %x_prod, %y_load + store i128 %y_plus, i128* %y_elem, align 8 + br label %for.check + +exit: + ret void +} + !0 = !{}