diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12223,10 +12223,16 @@ // Use of operator[] on the DenseMap may cause an insertion, which invalidates // the iterator, hence the need to make a copy to prevent a use-after-free. - NodeExtraInfo Copy = I->second; - if (LLVM_LIKELY(!Copy.PCSections)) { + NodeExtraInfo NEI = I->second; + if (LLVM_LIKELY(!NEI.PCSections)) { // No deep copy required for the types of extra info set. - SDEI[To] = std::move(Copy); + // + // FIXME: Investigate if other types of extra info also need deep copy. This + // depends on the types of nodes they can be attached to: if some extra info + // is only ever attached to nodes where a replacement To node is always the + // node where later use and propagation of the extra info has the intended + // semantics, no deep copy is required. + SDEI[To] = std::move(NEI); return; } @@ -12239,41 +12245,68 @@ // In the first step pre-populate the visited set with the nodes reachable // from the old From node. This avoids copying NodeExtraInfo to parts of the // DAG that is not new and should be left untouched. - DenseSet Visited; - auto VisitFrom = [&Visited](auto &&Self, SDNode *N, int Depth) { - constexpr int MaxDepth = 16; - if (Depth >= MaxDepth) + SmallVector Leafs{From}; // Leafs reachable with VisitFrom. + DenseSet FromReach; // The set of nodes reachable from From. + auto VisitFrom = [&](auto &&Self, const SDNode *N, int MaxDepth) { + if (MaxDepth == 0) { + // Remember this node in case we need to increase MaxDepth and continue + // populating FromReach from this node. + Leafs.emplace_back(N); return; - if (!Visited.insert(N).second) + } + if (!FromReach.insert(N).second) return; for (const SDValue &Op : N->op_values()) - Self(Self, Op.getNode(), Depth + 1); + Self(Self, Op.getNode(), MaxDepth - 1); }; - VisitFrom(VisitFrom, From, 0); // Copy extra info to To and all its transitive operands (that are new). - auto DeepCopyTo = [this, &Copy, &Visited](auto &&Self, SDNode *To) { - if (!Visited.insert(To).second) + SmallPtrSet Visited; + auto DeepCopyTo = [&](auto &&Self, const SDNode *N) { + if (FromReach.contains(N)) + return true; + if (!Visited.insert(N).second) return true; - if (getEntryNode().getNode() == To) { - // This should not happen - and if it did, that means From has a depth - // greater or equal to MaxDepth, and VisitFrom() could not visit all - // common operands. As a result, we're able to reach the entry node. - assert(false && "Too complex 'From' node - increase MaxDepth?"); + if (getEntryNode().getNode() == N) return false; - } - for (const SDValue &Op : To->op_values()) { + for (const SDValue &Op : N->op_values()) { if (!Self(Self, Op.getNode())) return false; } - SDEI[To] = Copy; + // Copy only if entry node was not reached. + SDEI[N] = NEI; return true; }; - if (LLVM_UNLIKELY(!DeepCopyTo(DeepCopyTo, To))) { - // Fallback - see assert above. - SDEI[To] = std::move(Copy); - } + // We first try with a lower MaxDepth, assuming that the path to common + // operands between From and To is relatively short. This significantly + // improves performance in the common case. The initial MaxDepth is big + // enough to avoid retry in the common case; the last MaxDepth is large + // enough to avoid having to use the fallback below (and protects from + // potential stack exhaustion from recursion). + for (int PrevDepth = 0, MaxDepth = 16; MaxDepth <= 1024; + PrevDepth = MaxDepth, MaxDepth *= 2, Visited.clear()) { + // StartFrom is the previous (or initial) set of leafs reachable at the + // previous maximum depth. + SmallVector StartFrom; + std::swap(StartFrom, Leafs); + for (const SDNode *N : StartFrom) + VisitFrom(VisitFrom, N, MaxDepth - PrevDepth); + if (LLVM_LIKELY(DeepCopyTo(DeepCopyTo, To))) + return; + // This should happen very rarely (reached the entry node). + LLVM_DEBUG(dbgs() << __func__ << ": MaxDepth=" << MaxDepth << " too low\n"); + assert(!Leafs.empty()); + } + + // This should not happen - but if it did, that means the subgraph reachable + // from From has depth greater or equal to maximum MaxDepth, and VisitFrom() + // could not visit all reachable common operands. Consequently, we were able + // to reach the entry node. + errs() << "warning: incomplete propagation of SelectionDAG::NodeExtraInfo\n"; + assert(false && "From subgraph too complex - increase max. MaxDepth?"); + // Best-effort fallback if assertions disabled. + SDEI[To] = std::move(NEI); } #ifndef NDEBUG diff --git a/llvm/test/CodeGen/X86/pcsections-memops.ll b/llvm/test/CodeGen/X86/pcsections-memops.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/pcsections-memops.ll @@ -0,0 +1,244 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 < %s | FileCheck %s --check-prefixes=O0 +; RUN: llc -O1 < %s | FileCheck %s --check-prefixes=OPT +; RUN: llc -O2 < %s | FileCheck %s --check-prefixes=OPT +; RUN: llc -O3 < %s | FileCheck %s --check-prefixes=OPT + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@__llvm_gcov_ctr.89 = internal global [2 x i64] zeroinitializer +@__llvm_gcov_ctr.109 = internal global [2 x i64] zeroinitializer +@__llvm_gcov_ctr.112 = internal global [2 x i64] zeroinitializer +@__llvm_gcov_ctr.113 = internal global [2 x i64] zeroinitializer +@__llvm_gcov_ctr.154 = internal global [1 x i64] zeroinitializer +@__llvm_gcov_ctr.155 = internal global [2 x i64] zeroinitializer +@__llvm_gcov_ctr.156 = internal global [2 x i64] zeroinitializer +@__llvm_gcov_ctr.157 = internal global [2 x i64] zeroinitializer +@__llvm_gcov_ctr.160 = internal global [2 x i64] zeroinitializer +@__llvm_gcov_ctr.161 = internal global [2 x i64] zeroinitializer +@__llvm_gcov_ctr.189 = internal global [2 x i64] zeroinitializer +@__llvm_gcov_ctr.1109 = internal global [2 x i64] zeroinitializer +@__llvm_gcov_ctr.1112 = internal global [2 x i64] zeroinitializer +@__llvm_gcov_ctr.1113 = internal global [2 x i64] zeroinitializer +@__llvm_gcov_ctr.1154 = internal global [1 x i64] zeroinitializer +@__llvm_gcov_ctr.1155 = internal global [2 x i64] zeroinitializer +@__llvm_gcov_ctr.1156 = internal global [2 x i64] zeroinitializer +@__llvm_gcov_ctr.1157 = internal global [2 x i64] zeroinitializer +@__llvm_gcov_ctr.1160 = internal global [2 x i64] zeroinitializer +@__llvm_gcov_ctr.1161 = internal global [2 x i64] zeroinitializer + +define dso_local void @independent_load_stores() { +; O0-LABEL: independent_load_stores: +; O0: # %bb.0: +; O0-NEXT: .Lpcsection0: +; O0-NEXT: movq __llvm_gcov_ctr.109, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection1: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.109 +; O0-NEXT: .Lpcsection2: +; O0-NEXT: movq __llvm_gcov_ctr.112, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection3: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.112 +; O0-NEXT: .Lpcsection4: +; O0-NEXT: movq __llvm_gcov_ctr.113, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection5: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.113 +; O0-NEXT: .Lpcsection6: +; O0-NEXT: movq __llvm_gcov_ctr.154, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection7: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.154 +; O0-NEXT: .Lpcsection8: +; O0-NEXT: movq __llvm_gcov_ctr.155, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection9: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.155 +; O0-NEXT: .Lpcsection10: +; O0-NEXT: movq __llvm_gcov_ctr.89, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection11: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.89 +; O0-NEXT: .Lpcsection12: +; O0-NEXT: movq __llvm_gcov_ctr.160, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection13: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.160 +; O0-NEXT: .Lpcsection14: +; O0-NEXT: movq __llvm_gcov_ctr.156, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection15: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.156 +; O0-NEXT: .Lpcsection16: +; O0-NEXT: movq __llvm_gcov_ctr.157, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection17: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.157 +; O0-NEXT: .Lpcsection18: +; O0-NEXT: movq __llvm_gcov_ctr.161, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection19: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.161 +; O0-NEXT: .Lpcsection20: +; O0-NEXT: movq __llvm_gcov_ctr.1109, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection21: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.1109 +; O0-NEXT: .Lpcsection22: +; O0-NEXT: movq __llvm_gcov_ctr.1112, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection23: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.1112 +; O0-NEXT: .Lpcsection24: +; O0-NEXT: movq __llvm_gcov_ctr.1113, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection25: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.1113 +; O0-NEXT: .Lpcsection26: +; O0-NEXT: movq __llvm_gcov_ctr.1154, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection27: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.1154 +; O0-NEXT: .Lpcsection28: +; O0-NEXT: movq __llvm_gcov_ctr.1155, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection29: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.1155 +; O0-NEXT: .Lpcsection30: +; O0-NEXT: movq __llvm_gcov_ctr.189, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection31: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.189 +; O0-NEXT: .Lpcsection32: +; O0-NEXT: movq __llvm_gcov_ctr.1160, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection33: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.1160 +; O0-NEXT: .Lpcsection34: +; O0-NEXT: movq __llvm_gcov_ctr.1156, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection35: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.1156 +; O0-NEXT: .Lpcsection36: +; O0-NEXT: movq __llvm_gcov_ctr.1157, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection37: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.1157 +; O0-NEXT: .Lpcsection38: +; O0-NEXT: movq __llvm_gcov_ctr.1161, %rax +; O0-NEXT: addq $1, %rax +; O0-NEXT: .Lpcsection39: +; O0-NEXT: movq %rax, __llvm_gcov_ctr.1161 +; O0-NEXT: retq +; +; OPT-LABEL: independent_load_stores: +; OPT: # %bb.0: +; OPT-NEXT: .Lpcsection0: +; OPT-NEXT: incq __llvm_gcov_ctr.109(%rip) +; OPT-NEXT: .Lpcsection1: +; OPT-NEXT: incq __llvm_gcov_ctr.112(%rip) +; OPT-NEXT: .Lpcsection2: +; OPT-NEXT: incq __llvm_gcov_ctr.113(%rip) +; OPT-NEXT: .Lpcsection3: +; OPT-NEXT: incq __llvm_gcov_ctr.154(%rip) +; OPT-NEXT: .Lpcsection4: +; OPT-NEXT: incq __llvm_gcov_ctr.155(%rip) +; OPT-NEXT: .Lpcsection5: +; OPT-NEXT: incq __llvm_gcov_ctr.89(%rip) +; OPT-NEXT: .Lpcsection6: +; OPT-NEXT: incq __llvm_gcov_ctr.160(%rip) +; OPT-NEXT: .Lpcsection7: +; OPT-NEXT: incq __llvm_gcov_ctr.156(%rip) +; OPT-NEXT: .Lpcsection8: +; OPT-NEXT: incq __llvm_gcov_ctr.157(%rip) +; OPT-NEXT: .Lpcsection9: +; OPT-NEXT: incq __llvm_gcov_ctr.161(%rip) +; OPT-NEXT: .Lpcsection10: +; OPT-NEXT: incq __llvm_gcov_ctr.1109(%rip) +; OPT-NEXT: .Lpcsection11: +; OPT-NEXT: incq __llvm_gcov_ctr.1112(%rip) +; OPT-NEXT: .Lpcsection12: +; OPT-NEXT: incq __llvm_gcov_ctr.1113(%rip) +; OPT-NEXT: .Lpcsection13: +; OPT-NEXT: incq __llvm_gcov_ctr.1154(%rip) +; OPT-NEXT: .Lpcsection14: +; OPT-NEXT: incq __llvm_gcov_ctr.1155(%rip) +; OPT-NEXT: .Lpcsection15: +; OPT-NEXT: incq __llvm_gcov_ctr.189(%rip) +; OPT-NEXT: .Lpcsection16: +; OPT-NEXT: incq __llvm_gcov_ctr.1160(%rip) +; OPT-NEXT: .Lpcsection17: +; OPT-NEXT: incq __llvm_gcov_ctr.1156(%rip) +; OPT-NEXT: .Lpcsection18: +; OPT-NEXT: incq __llvm_gcov_ctr.1157(%rip) +; OPT-NEXT: .Lpcsection19: +; OPT-NEXT: incq __llvm_gcov_ctr.1161(%rip) +; OPT-NEXT: retq + %1 = load i64, ptr @__llvm_gcov_ctr.109, align 8, !pcsections !0 + %2 = add i64 %1, 1 + store i64 %2, ptr @__llvm_gcov_ctr.109, align 8, !pcsections !0 + %3 = load i64, ptr @__llvm_gcov_ctr.112, align 8, !pcsections !0 + %4 = add i64 %3, 1 + store i64 %4, ptr @__llvm_gcov_ctr.112, align 8, !pcsections !0 + %5 = load i64, ptr @__llvm_gcov_ctr.113, align 8, !pcsections !0 + %6 = add i64 %5, 1 + store i64 %6, ptr @__llvm_gcov_ctr.113, align 8, !pcsections !0 + %7 = load i64, ptr @__llvm_gcov_ctr.154, align 8, !pcsections !0 + %8 = add i64 %7, 1 + store i64 %8, ptr @__llvm_gcov_ctr.154, align 8, !pcsections !0 + %9 = load i64, ptr @__llvm_gcov_ctr.155, align 8, !pcsections !0 + %10 = add i64 %9, 1 + store i64 %10, ptr @__llvm_gcov_ctr.155, align 8, !pcsections !0 + %11 = load i64, ptr @__llvm_gcov_ctr.89, align 8, !pcsections !0 + %12 = add i64 %11, 1 + store i64 %12, ptr @__llvm_gcov_ctr.89, align 8, !pcsections !0 + %13 = load i64, ptr @__llvm_gcov_ctr.160, align 8, !pcsections !0 + %14 = add i64 %13, 1 + store i64 %14, ptr @__llvm_gcov_ctr.160, align 8, !pcsections !0 + %15 = load i64, ptr @__llvm_gcov_ctr.156, align 8, !pcsections !0 + %16 = add i64 %15, 1 + store i64 %16, ptr @__llvm_gcov_ctr.156, align 8, !pcsections !0 + %17 = load i64, ptr @__llvm_gcov_ctr.157, align 8, !pcsections !0 + %18 = add i64 %17, 1 + store i64 %18, ptr @__llvm_gcov_ctr.157, align 8, !pcsections !0 + %19 = load i64, ptr @__llvm_gcov_ctr.161, align 8, !pcsections !0 + %20 = add i64 %19, 1 + store i64 %20, ptr @__llvm_gcov_ctr.161, align 8, !pcsections !0 + + %21 = load i64, ptr @__llvm_gcov_ctr.1109, align 8, !pcsections !0 + %22 = add i64 %21, 1 + store i64 %22, ptr @__llvm_gcov_ctr.1109, align 8, !pcsections !0 + %23 = load i64, ptr @__llvm_gcov_ctr.1112, align 8, !pcsections !0 + %24 = add i64 %23, 1 + store i64 %24, ptr @__llvm_gcov_ctr.1112, align 8, !pcsections !0 + %25 = load i64, ptr @__llvm_gcov_ctr.1113, align 8, !pcsections !0 + %26 = add i64 %25, 1 + store i64 %26, ptr @__llvm_gcov_ctr.1113, align 8, !pcsections !0 + %27 = load i64, ptr @__llvm_gcov_ctr.1154, align 8, !pcsections !0 + %28 = add i64 %27, 1 + store i64 %28, ptr @__llvm_gcov_ctr.1154, align 8, !pcsections !0 + %29 = load i64, ptr @__llvm_gcov_ctr.1155, align 8, !pcsections !0 + %30 = add i64 %29, 1 + store i64 %30, ptr @__llvm_gcov_ctr.1155, align 8, !pcsections !0 + %31 = load i64, ptr @__llvm_gcov_ctr.189, align 8, !pcsections !0 + %32 = add i64 %31, 1 + store i64 %32, ptr @__llvm_gcov_ctr.189, align 8, !pcsections !0 + %33 = load i64, ptr @__llvm_gcov_ctr.1160, align 8, !pcsections !0 + %34 = add i64 %33, 1 + store i64 %34, ptr @__llvm_gcov_ctr.1160, align 8, !pcsections !0 + %35 = load i64, ptr @__llvm_gcov_ctr.1156, align 8, !pcsections !0 + %36 = add i64 %35, 1 + store i64 %36, ptr @__llvm_gcov_ctr.1156, align 8, !pcsections !0 + %37 = load i64, ptr @__llvm_gcov_ctr.1157, align 8, !pcsections !0 + %38 = add i64 %37, 1 + store i64 %38, ptr @__llvm_gcov_ctr.1157, align 8, !pcsections !0 + %39 = load i64, ptr @__llvm_gcov_ctr.1161, align 8, !pcsections !0 + %40 = add i64 %39, 1 + store i64 %40, ptr @__llvm_gcov_ctr.1161, align 8, !pcsections !0 + + ret void +} + +!0 = !{!"foo"}