diff --git a/lldb/include/lldb/Symbol/Function.h b/lldb/include/lldb/Symbol/Function.h --- a/lldb/include/lldb/Symbol/Function.h +++ b/lldb/include/lldb/Symbol/Function.h @@ -266,6 +266,7 @@ /// in the call graph between two functions, or to evaluate DW_OP_entry_value. class CallEdge { public: + enum class AddrType : uint8_t { Call, AfterCall }; virtual ~CallEdge() {} /// Get the callee's definition. @@ -281,21 +282,32 @@ /// made the call. lldb::addr_t GetReturnPCAddress(Function &caller, Target &target) const; - /// Like \ref GetReturnPCAddress, but returns an unresolved file address. - lldb::addr_t GetUnresolvedReturnPCAddress() const { return return_pc; } + /// Return an address in the caller. This can either be the address of the + /// call instruction, or the address of the instruction after the call. + std::pair GetCallerAddress(Function &caller, + Target &target) const { + return {caller_address_type, + GetLoadAddress(caller_address, caller, target)}; + } - /// Get the load PC address of the call instruction (or LLDB_INVALID_ADDRESS). - lldb::addr_t GetCallInstPC(Function &caller, Target &target) const; + bool IsTailCall() const { return is_tail_call; } /// Get the call site parameters available at this call edge. llvm::ArrayRef GetCallSiteParameters() const { return parameters; } + /// Non-tail-calls go first, sorted by the return address. They are followed + /// by tail calls, which have no specific order. + std::pair GetSortKey() const { + return {is_tail_call, GetUnresolvedReturnPCAddress()}; + } + protected: - CallEdge(lldb::addr_t return_pc, lldb::addr_t call_inst_pc, - CallSiteParameterArray &¶meters) - : return_pc(return_pc), call_inst_pc(call_inst_pc), + CallEdge(AddrType caller_address_type, lldb::addr_t caller_address, + bool is_tail_call, CallSiteParameterArray &¶meters) + : caller_address(caller_address), + caller_address_type(caller_address_type), is_tail_call(is_tail_call), parameters(std::move(parameters)) {} /// Helper that finds the load address of \p unresolved_pc, a file address @@ -303,13 +315,17 @@ static lldb::addr_t GetLoadAddress(lldb::addr_t unresolved_pc, Function &caller, Target &target); - /// An invalid address if this is a tail call. Otherwise, the return PC for - /// the call. Note that this is a file address which must be resolved. - lldb::addr_t return_pc; + /// Like \ref GetReturnPCAddress, but returns an unresolved file address. + lldb::addr_t GetUnresolvedReturnPCAddress() const { + return caller_address_type == AddrType::AfterCall && !is_tail_call + ? caller_address + : LLDB_INVALID_ADDRESS; + } - /// The address of the call instruction. Usually an invalid address, unless - /// this is a tail call. - lldb::addr_t call_inst_pc; +private: + lldb::addr_t caller_address; + AddrType caller_address_type; + bool is_tail_call; CallSiteParameterArray parameters; }; @@ -321,9 +337,11 @@ public: /// Construct a call edge using a symbol name to identify the callee, and a /// return PC within the calling function to identify a specific call site. - DirectCallEdge(const char *symbol_name, lldb::addr_t return_pc, - lldb::addr_t call_inst_pc, CallSiteParameterArray &¶meters) - : CallEdge(return_pc, call_inst_pc, std::move(parameters)) { + DirectCallEdge(const char *symbol_name, AddrType caller_address_type, + lldb::addr_t caller_address, bool is_tail_call, + CallSiteParameterArray &¶meters) + : CallEdge(caller_address_type, caller_address, is_tail_call, + std::move(parameters)) { lazy_callee.symbol_name = symbol_name; } @@ -352,10 +370,11 @@ public: /// Construct a call edge using a DWARFExpression to identify the callee, and /// a return PC within the calling function to identify a specific call site. - IndirectCallEdge(DWARFExpression call_target, lldb::addr_t return_pc, - lldb::addr_t call_inst_pc, + IndirectCallEdge(DWARFExpression call_target, AddrType caller_address_type, + lldb::addr_t caller_address, bool is_tail_call, CallSiteParameterArray &¶meters) - : CallEdge(return_pc, call_inst_pc, std::move(parameters)), + : CallEdge(caller_address_type, caller_address, is_tail_call, + std::move(parameters)), call_target(std::move(call_target)) {} Function *GetCallee(ModuleList &images, ExecutionContext &exe_ctx) override; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -3733,26 +3733,30 @@ child.GetCU()); } } - if (child.Tag() == DW_TAG_GNU_call_site) { - // In DWARF v4 DW_AT_low_pc is always the address of the instruction after - // the call. For tail calls, approximate the address of the call - // instruction by subtracting 1. - if (tail_call) - call_inst_pc = low_pc - 1; - else - return_pc = low_pc; - } - if (!call_origin && !call_target) { LLDB_LOG(log, "CollectCallEdges: call site without any call target"); continue; } + addr_t caller_address; + CallEdge::AddrType caller_address_type; + if (return_pc != LLDB_INVALID_ADDRESS) { + caller_address = return_pc; + caller_address_type = CallEdge::AddrType::AfterCall; + } else if (low_pc != LLDB_INVALID_ADDRESS) { + caller_address = low_pc; + caller_address_type = CallEdge::AddrType::AfterCall; + } else if (call_inst_pc != LLDB_INVALID_ADDRESS) { + caller_address = call_inst_pc; + caller_address_type = CallEdge::AddrType::Call; + } else { + LLDB_LOG(log, "CollectCallEdges: No caller address"); + continue; + } // Adjust any PC forms. It needs to be fixed up if the main executable // contains a debug map (i.e. pointers to object files), because we need a // file address relative to the executable's text section. - return_pc = FixupAddress(return_pc); - call_inst_pc = FixupAddress(call_inst_pc); + caller_address = FixupAddress(caller_address); // Extract call site parameters. CallSiteParameterArray parameters = @@ -3764,9 +3768,9 @@ "CollectCallEdges: Found call origin: {0} (retn-PC: {1:x}) " "(call-PC: {2:x})", call_origin->GetPubname(), return_pc, call_inst_pc); - edge = std::make_unique(call_origin->GetMangledName(), - return_pc, call_inst_pc, - std::move(parameters)); + edge = std::make_unique( + call_origin->GetMangledName(), caller_address_type, caller_address, + tail_call, std::move(parameters)); } else { if (log) { StreamString call_target_desc; @@ -3776,7 +3780,8 @@ call_target_desc.GetString()); } edge = std::make_unique( - *call_target, return_pc, call_inst_pc, std::move(parameters)); + *call_target, caller_address_type, caller_address, tail_call, + std::move(parameters)); } if (log && parameters.size()) { diff --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp --- a/lldb/source/Symbol/Function.cpp +++ b/lldb/source/Symbol/Function.cpp @@ -145,11 +145,7 @@ lldb::addr_t CallEdge::GetReturnPCAddress(Function &caller, Target &target) const { - return GetLoadAddress(return_pc, caller, target); -} - -lldb::addr_t CallEdge::GetCallInstPC(Function &caller, Target &target) const { - return GetLoadAddress(call_inst_pc, caller, target); + return GetLoadAddress(GetUnresolvedReturnPCAddress(), caller, target); } void DirectCallEdge::ParseSymbolFileAndResolve(ModuleList &images) { @@ -313,36 +309,34 @@ m_call_edges = sym_file->ParseCallEdgesInFunction(GetID()); // Sort the call edges to speed up return_pc lookups. - llvm::sort(m_call_edges.begin(), m_call_edges.end(), - [](const std::unique_ptr &LHS, - const std::unique_ptr &RHS) { - return LHS->GetUnresolvedReturnPCAddress() < - RHS->GetUnresolvedReturnPCAddress(); - }); + llvm::sort(m_call_edges, [](const std::unique_ptr &LHS, + const std::unique_ptr &RHS) { + return LHS->GetSortKey() < RHS->GetSortKey(); + }); return m_call_edges; } llvm::ArrayRef> Function::GetTailCallingEdges() { - // Call edges are sorted by return PC, and tail calling edges have invalid - // return PCs. Find them at the end of the list. - return GetCallEdges().drop_until([](const std::unique_ptr &edge) { - return edge->GetUnresolvedReturnPCAddress() == LLDB_INVALID_ADDRESS; - }); + // Tail calling edges are sorted at the end of the list. Find them by dropping + // all non-tail-calls. + return GetCallEdges().drop_until( + [](const std::unique_ptr &edge) { return edge->IsTailCall(); }); } CallEdge *Function::GetCallEdgeForReturnAddress(addr_t return_pc, Target &target) { auto edges = GetCallEdges(); auto edge_it = - std::lower_bound(edges.begin(), edges.end(), return_pc, - [&](const std::unique_ptr &edge, addr_t pc) { - return edge->GetReturnPCAddress(*this, target) < pc; - }); + llvm::partition_point(edges, [&](const std::unique_ptr &edge) { + return std::make_pair(edge->IsTailCall(), + edge->GetReturnPCAddress(*this, target)) < + std::make_pair(false, return_pc); + }); if (edge_it == edges.end() || edge_it->get()->GetReturnPCAddress(*this, target) != return_pc) return nullptr; - return &const_cast(*edge_it->get()); + return edge_it->get(); } Block &Function::GetBlock(bool can_create) { diff --git a/lldb/source/Target/StackFrameList.cpp b/lldb/source/Target/StackFrameList.cpp --- a/lldb/source/Target/StackFrameList.cpp +++ b/lldb/source/Target/StackFrameList.cpp @@ -239,7 +239,12 @@ /// A sequence of calls that comprise some portion of a backtrace. Each frame /// is represented as a pair of a callee (Function *) and an address within the /// callee. -using CallSequence = std::vector>; +struct CallDescriptor { + Function *func; + CallEdge::AddrType address_type; + addr_t address; +}; +using CallSequence = std::vector; /// Find the unique path through the call graph from \p begin (with return PC /// \p return_pc) to \p end. On success this path is stored into \p path, and @@ -319,14 +324,14 @@ } // Search the calls made from this callee. - active_path.emplace_back(&callee, LLDB_INVALID_ADDRESS); + active_path.push_back(CallDescriptor{&callee}); for (const auto &edge : callee.GetTailCallingEdges()) { Function *next_callee = edge->GetCallee(images, context); if (!next_callee) continue; - addr_t tail_call_pc = edge->GetCallInstPC(callee, target); - active_path.back().second = tail_call_pc; + std::tie(active_path.back().address_type, active_path.back().address) = + edge->GetCallerAddress(callee, target); dfs(*edge, *next_callee); if (ambiguous) @@ -400,16 +405,16 @@ // Push synthetic tail call frames. for (auto calleeInfo : llvm::reverse(path)) { - Function *callee = calleeInfo.first; + Function *callee = calleeInfo.func; uint32_t frame_idx = m_frames.size(); uint32_t concrete_frame_idx = next_frame.GetConcreteFrameIndex(); addr_t cfa = LLDB_INVALID_ADDRESS; bool cfa_is_valid = false; - addr_t pc = calleeInfo.second; - // We do not want to subtract 1 from this PC, as it's the actual address - // of the tail-calling branch instruction. This address is provided by the - // compiler via DW_AT_call_pc. - constexpr bool behaves_like_zeroth_frame = true; + addr_t pc = calleeInfo.address; + // If the callee address refers to the call instruction, we do not want to + // subtract 1 from this value. + const bool behaves_like_zeroth_frame = + calleeInfo.address_type == CallEdge::AddrType::Call; SymbolContext sc; callee->CalculateSymbolContext(&sc); auto synth_frame = std::make_shared( diff --git a/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/main.cpp b/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/main.cpp --- a/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/main.cpp +++ b/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/main.cpp @@ -3,12 +3,22 @@ void __attribute__((noinline)) sink() { x++; //% self.filecheck("bt", "main.cpp", "-implicit-check-not=artificial") // CHECK: frame #0: 0x{{[0-9a-f]+}} a.out`sink() at main.cpp:[[@LINE-1]]:4 - // CHECK-NEXT: frame #1: 0x{{[0-9a-f]+}} a.out`func3() at main.cpp:16:3 + // CHECK-NEXT: frame #1: 0x{{[0-9a-f]+}} a.out`func3() at main.cpp:26:3 // CHECK-SAME: [artificial] // CHECK-NEXT: frame #2: 0x{{[0-9a-f]+}} a.out`func2() - // CHECK-NEXT: frame #3: 0x{{[0-9a-f]+}} a.out`func1() at main.cpp:25:3 + // CHECK-NEXT: frame #3: 0x{{[0-9a-f]+}} a.out`func1() at main.cpp:35:3 // CHECK-SAME: [artificial] // CHECK-NEXT: frame #4: 0x{{[0-9a-f]+}} a.out`main + // In the GNU style, the artificial frames will point after the tail call + // instruction. In v5 they should point to the instruction itself. + //% frame1 = self.thread().GetFrameAtIndex(1) + //% func3 = frame1.GetFunction() + //% func3_insns = func3.GetInstructions(self.target()) + //% self.trace("func3:\n%s"%func3_insns) + //% last_insn = func3_insns.GetInstructionAtIndex(func3_insns.GetSize()-1) + //% addr = last_insn.GetAddress() + //% if "GNU" in self.name: addr.OffsetAddress(last_insn.GetByteSize()) + //% self.assertEquals(frame1.GetPCAddress(), addr) } void __attribute__((noinline)) func3() {