Index: llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h =================================================================== --- /dev/null +++ llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h @@ -0,0 +1,115 @@ +#ifndef LLVM_CODEGEN_ASSIGNMENTTRACKINGANALYSIS_H +#define LLVM_CODEGEN_ASSIGNMENTTRACKINGANALYSIS_H + +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h" + +namespace llvm { +class Function; +class Instruction; +class Value; +class raw_ostream; +} // namespace llvm +class FunctionVarLocsBuilder; + +namespace llvm { +/// Type wrapper for integer ID for Variables. 0 is reserved. +enum class VariableID : unsigned { Reserved = 0 }; +/// Variable location definition used by FunctionVarLocs. +struct VarLocInfo { + llvm::VariableID VariableID; + DIExpression *Expr = nullptr; + DebugLoc DL; + Value *V = nullptr; // TODO: Needs to be value_s_ for variadic expressions. +}; + +/// Data structure describing the variable locations in a function. Used as the +/// result of the AssignmentTrackingAnalysis pass. Essentially read-only +/// outside of AssignmentTrackingAnalysis where it is built. +class FunctionVarLocs { + /// Maps VarLocInfo.VariableID to a DebugVariable for VarLocRecords. + SmallVector Variables; + /// List of variable location changes grouped by the instruction the + /// change occurs before (see VarLocsBeforeInst). The elements from + /// zero to SingleVarLocEnd represent variables with a single location. + SmallVector VarLocRecords; + /// End of range of VarLocRecords that represent variables with a single + /// location that is valid for the entire scope. Range starts at 0. + unsigned SingleVarLocEnd = 0; + /// Maps an instruction to a range of VarLocs that start just before it. + DenseMap> + VarLocsBeforeInst; + +public: + /// Return the DILocalVariable for the location definition represented by \p + /// ID. + DILocalVariable *getDILocalVariable(const VarLocInfo *Loc) const { + VariableID VarID = Loc->VariableID; + return getDILocalVariable(VarID); + } + /// Return the DILocalVariable of the variable represented by \p ID. + DILocalVariable *getDILocalVariable(VariableID ID) const { + return const_cast(getVariable(ID).getVariable()); + } + /// Return the DebugVariable represented by \p ID. + const DebugVariable &getVariable(VariableID ID) const { + return Variables[static_cast(ID)]; + } + + ///@name iterators + ///@{ + /// First single-location variable location definition. + const VarLocInfo *single_locs_begin() const { return VarLocRecords.begin(); } + /// One past the last single-location variable location definition. + const VarLocInfo *single_locs_end() const { + return VarLocRecords.begin() + SingleVarLocEnd; + } + /// First variable location definition that comes before \p Before. + const VarLocInfo *locs_begin(const Instruction *Before) const { + auto Span = VarLocsBeforeInst.lookup(Before); + auto It = VarLocRecords.begin(); + std::advance(It, Span.first); + return It; + } + /// One past the last variable location definition that comes before \p + /// Before. + const VarLocInfo *locs_end(const Instruction *Before) const { + auto Span = VarLocsBeforeInst.lookup(Before); + auto It = VarLocRecords.begin(); + std::advance(It, Span.second); + return It; + } + ///@} + + void print(raw_ostream &OS, const Function &Fn) const; + + ///@{ + /// Non-const methods used by AssignmentTrackingAnalysis (which invalidate + /// analysis results if called incorrectly). + void init(FunctionVarLocsBuilder &Builder); + void clear(); + ///@} +}; + +class AssignmentTrackingAnalysis : public FunctionPass { + std::unique_ptr Results; + +public: + static char ID; + + AssignmentTrackingAnalysis(); + + bool runOnFunction(Function &F) override; + + static bool isRequired() { return true; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + const FunctionVarLocs *getResults() { return Results.get(); } +}; + +} // end namespace llvm +#endif // LLVM_CODEGEN_ASSIGNMENTTRACKINGANALYSIS_H Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -53,6 +53,7 @@ void initializeADCELegacyPassPass(PassRegistry&); void initializeAddDiscriminatorsLegacyPassPass(PassRegistry&); void initializeAddFSDiscriminatorsPass(PassRegistry &); +void initializeAssignmentTrackingAnalysisPass(PassRegistry &); void initializeAlignmentFromAssumptionsPass(PassRegistry&); void initializeAlwaysInlinerLegacyPassPass(PassRegistry&); void initializeAssumeSimplifyPassLegacyPassPass(PassRegistry &); Index: llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp =================================================================== --- /dev/null +++ llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp @@ -0,0 +1,1485 @@ +#include "llvm/CodeGen/AssignmentTrackingAnalysis.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/IntervalMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/UniqueVector.h" +#include "llvm/Analysis/Interval.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/PrintPasses.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include +#include +#include +#include + +using namespace llvm; +#define DEBUG_TYPE "debug-ata" + +static cl::opt + MaxNumBlocks("debug-ata-max-blocks", cl::init(10000), + cl::desc("Maximum num basic blocks before debug info dropped"), + cl::Hidden); +/// Print the results of the analysis. Respects -filter-print-funcs. +static cl::opt PrintResults("print-debug-ata", cl::init(false), + cl::Hidden); + +// Implicit conversions are disabled for enum class types, so unfortunately we +// need to create a DenseMapInfo wrapper around the specified underlying type. +template <> struct llvm::DenseMapInfo { + using Wrapped = DenseMapInfo; + static inline VariableID getEmptyKey() { + return static_cast(Wrapped::getEmptyKey()); + } + static inline VariableID getTombstoneKey() { + return static_cast(Wrapped::getTombstoneKey()); + } + static unsigned getHashValue(const VariableID &Val) { + return Wrapped::getHashValue(static_cast(Val)); + } + static bool isEqual(const VariableID &LHS, const VariableID &RHS) { + return LHS == RHS; + } +}; + +/// Helper class to build FunctionVarLocs, since that class isn't easy to +/// modify. TODO: There's not a great deal of value in the split, it could be +/// worth merging the two classes. +class FunctionVarLocsBuilder { + friend FunctionVarLocs; + UniqueVector Variables; + // Use an unordered_map so we don't invalidate iterators after + // insert/modifications. + std::unordered_map> + VarLocsBeforeInst; + + SmallVector SingleLocVars; + +public: + /// Find or insert \p V and return the ID. + VariableID insertVariable(DebugVariable V) { + return static_cast(Variables.insert(V)); + } + + /// Get a variable from its \p ID. + const DebugVariable &getVariable(VariableID ID) const { + return Variables[static_cast(ID)]; + } + + /// Return ptr to wedge of defs or nullptr if no defs come just before /p + /// Before. + const SmallVectorImpl *getWedge(const Instruction *Before) const { + auto R = VarLocsBeforeInst.find(Before); + if (R == VarLocsBeforeInst.end()) + return nullptr; + return &R->second; + } + + /// Replace the defs that come just before /p Before with /p Wedge. + void setWedge(const Instruction *Before, SmallVector &&Wedge) { + VarLocsBeforeInst[Before] = std::move(Wedge); + } + + /// Add a def for a variable that is valid for its lifetime. + void addSingleLocVar(DebugVariable Var, DIExpression *Expr, DebugLoc DL, + Value *V) { + VarLocInfo VarLoc; + VarLoc.VariableID = insertVariable(Var); + VarLoc.Expr = Expr; + VarLoc.DL = DL; + VarLoc.V = V; + SingleLocVars.emplace_back(VarLoc); + } + + /// Add a def to the wedge of defs just before /p Before. + void addVarLoc(Instruction *Before, DebugVariable Var, DIExpression *Expr, + DebugLoc DL, Value *V) { + VarLocInfo VarLoc; + VarLoc.VariableID = insertVariable(Var); + VarLoc.Expr = Expr; + VarLoc.DL = DL; + VarLoc.V = V; + VarLocsBeforeInst[Before].emplace_back(VarLoc); + } +}; + +void FunctionVarLocs::print(raw_ostream &OS, const Function &Fn) const { + // Print the variable table first. TODO: Sorting by variable could make the + // output more stable? + unsigned Counter = -1; + OS << "=== Variables ===\n"; + for (const DebugVariable &V : Variables) { + ++Counter; + // Skip first entry because it is a dummy entry. + if (Counter == 0) { + continue; + } + OS << "[" << Counter << "] " << V.getVariable()->getName(); + if (auto F = V.getFragment()) + OS << " bits [" << F->OffsetInBits << ", " + << F->OffsetInBits + F->SizeInBits << ")"; + if (const auto *IA = V.getInlinedAt()) + OS << " inlined-at " << *IA; + OS << "\n"; + } + + auto PrintLoc = [&OS](const VarLocInfo &Loc) { + OS << "DEF Var=[" << (unsigned)Loc.VariableID << "]" + << " Expr=" << *Loc.Expr << " V=" << *Loc.V << "\n"; + }; + + // Print the single location variables. + OS << "=== Single location vars ===\n"; + for (auto It = single_locs_begin(), End = single_locs_end(); It != End; + ++It) { + PrintLoc(*It); + } + + // Print the non-single-location defs in line with IR. + OS << "=== In-line variable defs ==="; + for (const BasicBlock &BB : Fn) { + OS << "\n" << BB.getName() << ":\n"; + for (const Instruction &I : BB) { + for (auto It = locs_begin(&I), End = locs_end(&I); It != End; ++It) { + PrintLoc(*It); + } + OS << I << "\n"; + } + } +} + +void FunctionVarLocs::init(FunctionVarLocsBuilder &Builder) { + // Add the single-location variables first. + for (auto VarLoc : Builder.SingleLocVars) + VarLocRecords.emplace_back(VarLoc); + // Mark the end of the section. + SingleVarLocEnd = VarLocRecords.size(); + + // Insert a contiguous block of VarLocInfos for each instruction, mapping it + // to the start and end position in the vector with VarLocsBeforeInst. + for (auto &P : Builder.VarLocsBeforeInst) { + unsigned BlockStart = VarLocRecords.size(); + for (const VarLocInfo &VarLoc : P.second) + VarLocRecords.emplace_back(VarLoc); + unsigned BlockEnd = VarLocRecords.size(); + // Record the start and end indices. + if (BlockEnd != BlockStart) + VarLocsBeforeInst[P.first] = {BlockStart, BlockEnd}; + } + + // Copy the Variables vector from the builder's UniqueVector. + assert(Variables.empty() && "Expect clear before init"); + // UniqueVectors IDs are one-based (which means the VarLocInfo VarID values + // are one-based) so reserve an extra and insert a dummy. + Variables.reserve(Builder.Variables.size() + 1); + Variables.push_back(DebugVariable(nullptr, None, nullptr)); + Variables.append(Builder.Variables.begin(), Builder.Variables.end()); +} + +void FunctionVarLocs::clear() { + Variables.clear(); + VarLocRecords.clear(); + VarLocsBeforeInst.clear(); + SingleVarLocEnd = 0; +} + +/// Walk backwards along constant GEPs and bitcasts to the base storage from \p +/// Start as far as possible. Prepend \Expression with the offset and append it +/// with a DW_OP_deref that haes been implicit until now. Returns the walked-to +/// value and modified expression. +static std::pair +walkToAllocaAndPrependOffsetDeref(const DataLayout &DL, Value *Start, + DIExpression *Expression) { + APInt OffsetInBytes(DL.getTypeSizeInBits(Start->getType()), false); + Value *End = + Start->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetInBytes); + SmallVector Ops; + if (OffsetInBytes.getBoolValue()) { + Ops = {dwarf::DW_OP_plus_uconst, OffsetInBytes.getZExtValue()}; + Expression = DIExpression::prependOpcodes( + Expression, Ops, /*StackValue=*/false, /*EntryValue=*/false); + } + Expression = DIExpression::append(Expression, {dwarf::DW_OP_deref}); + return {End, Expression}; +} + +/// A whole (unfragmented) source variable. +using DebugAggregate = std::pair; +static DebugAggregate getAggregate(const DbgVariableIntrinsic *DII) { + return DebugAggregate(DII->getVariable(), DII->getDebugLoc().getInlinedAt()); +} + +/// AssignmentTrackingLowering encapsulates a dataflow analysis over a function +/// that interprets assignment tracking debug info metadata and stores in IR to +/// create a map of variable locations. +class AssignmentTrackingLowering { +public: + /// The kind of location in use for a variable, where Mem is the stack home, + /// Val is an SSA value or const, and None means that there is not one single + /// kind (either because there are multiple or because there is none; it may + /// prove useful to split this into two values in the future). + /// + /// LocKind is a join-semilattice with the partial order: + /// None > Mem, Val + /// + /// i.e. + /// join(Mem, Mem) = Mem + /// join(Val, Val) = Val + /// join(Mem, Val) = None + /// join(None, Mem) = None + /// join(None, Val) = None + /// join(None, None) = None + /// + /// Note: the order is not `None > Val > Mem` because we're using DIAssignID + /// to name assignments and are not tracking the actual stored values. + /// Therefore currently there's no way to ensure that Mem values and Val + /// values are the same. This could be a future extension, though it's not + /// clear that many additional locations would be recovered that way in + /// practice as the likelihood of this sitation arising naturally seems + /// incredibly low. + enum class LocKind { Mem, Val, None }; + + /// An abstraction of the assignment of a value to a variable or memory + /// location. + /// + /// An Assignment is Known or NoneOrPhi. A Known Assignment means we have a + /// DIAssignID ptr that represents it. NoneOrPhi means that we don't (or + /// can't) know the ID of the last assignment that took place. + /// + /// The Status of the Assignment (Known or NoneOrPhi) is another + /// join-semilattice. The partial order is: + /// NoneOrPhi > Known {id_0, id_1, ...id_N} + /// + /// i.e. for all values x and y where x != y: + /// join(x, x) = x + /// join(x, y) = NoneOrPhi + struct Assignment { + enum S { Known, NoneOrPhi } Status; + /// ID of the assignment. nullptr if Status is not Known. + DIAssignID *ID; + /// The dbg.assign that marks this dbg-def. Mem-defs don't use this field. + /// May be nullptr. + DbgAssignIntrinsic *Source; + + bool isSameSourceAssignment(const Assignment &Other) const { + // Don't include Source in the equality check. Assignments are + // defined by their ID, not debug intrinsic(s). + return std::tie(Status, ID) == std::tie(Other.Status, Other.ID); + } + void dump(raw_ostream &OS) { + static const char *LUT[] = {"Known", "NoneOrPhi"}; + OS << LUT[Status] << "(id="; + if (ID) + OS << ID; + else + OS << "null"; + OS << ", s="; + if (Source) + OS << *Source; + else + OS << "null"; + OS << ")"; + } + + static Assignment make(DIAssignID *ID, DbgAssignIntrinsic *Source) { + return Assignment(Known, ID, Source); + } + static Assignment makeFromMemDef(DIAssignID *ID) { + return Assignment(Known, ID, nullptr); + } + static Assignment makeNoneOrPhi() { + return Assignment(NoneOrPhi, nullptr, nullptr); + } + // Again, need a Top value? + Assignment() + : Status(NoneOrPhi), ID(nullptr), Source(nullptr) { + } // Can we delete this? + Assignment(S Status, DIAssignID *ID, DbgAssignIntrinsic *Source) + : Status(Status), ID(ID), Source(Source) { + // If the Status is Known then we expect there to be an assignment ID. + assert(Status == NoneOrPhi || ID); + } + }; + + using AssignmentMap = DenseMap; + using LocMap = DenseMap; + using OverlapMap = DenseMap>; + +private: + /// Map a variable to the set of variables that it fully contains. + OverlapMap VarContains; + + // Machinery to defer inserting dbg.values. + using InsertMap = MapVector>; + InsertMap InsertBeforeMap; + /// Clear the location definitions currently cached for insertion after /p + /// After. + void resetInsertionPoint(Instruction *After); + void emitDbgValue(LocKind Kind, const DbgVariableIntrinsic *Source, + Instruction *After); + + static bool mapsAreEqual(const AssignmentMap &A, const AssignmentMap &B) { + if (A.size() != B.size()) + return false; + for (const auto &Pair : A) { + VariableID Var = Pair.first; + const Assignment &AV = Pair.second; + auto R = B.find(Var); + // Check if this entry exists in B, otherwise ret false. + if (R == B.end()) + return false; + // Check that the assignment value is the same. + if (!AV.isSameSourceAssignment(R->second)) + return false; + } + return true; + } + + /// Represents the stack and debug assignments in a block. Used to describe + /// the live-in and live-out values for blocks, as well as the "current" + /// value as we process each instruction in a block. + struct BlockInfo { + /// Dominating assignment to memory for each variable. + AssignmentMap StackHomeValue; + /// Dominating assignemnt to each variable. + AssignmentMap DebugValue; + /// Location kind for each variable. LiveLoc indicates whether the + /// dominating assignment in StackHomeValue (LocKind::Mem), DebugValue + /// (LocKind::Val), or neither (LocKind::None) is valid, in that order of + /// preference. This cannot be derived by inspecting DebugValue and + /// StackHomeValue due to the fact that there's no distinction in + /// Assignment (the class) between whether an assignment is unknown or a + /// merge of multiple assignments (both are Status::NoneOrPhi). In other + /// words, the memory location may well be valid while both DebugValue and + /// StackHomeValue contain Assignments that have a Status of NoneOrPhi. + LocMap LiveLoc; + + /// Compare every element in each map to determine structural equality + /// (slow). + bool operator==(const BlockInfo &Other) const { + return LiveLoc == Other.LiveLoc && + mapsAreEqual(StackHomeValue, Other.StackHomeValue) && + mapsAreEqual(DebugValue, Other.DebugValue); + } + bool operator!=(const BlockInfo &Other) const { return !(*this == Other); } + bool isValid() { + return LiveLoc.size() == DebugValue.size() && + LiveLoc.size() == StackHomeValue.size(); + } + }; + + Function &Fn; + const DataLayout &Layout; + const DenseSet *VarsWithStackSlot; + FunctionVarLocsBuilder *FnVarLocs; + DenseMap LiveIn; + DenseMap LiveOut; + + /// Helper for process methods to track variables touched each frame. + DenseSet VarsTouchedThisFrame; + + /// The set of variables that sometimes are not located in their stack home. + DenseSet NotAlwaysStackHomed; + + VariableID getVariableID(const DebugVariable &Var) { + return static_cast(FnVarLocs->insertVariable(Var)); + } + + /// Join the LiveOut values of preds that are contained in \p Visited into + /// LiveIn[BB]. Return True if LiveIn[BB] has changed as a result. LiveIn[BB] + /// values monotonically increase. See the @link joinMethods join methods + /// @endlink documentation for more info. + bool join(const BasicBlock &BB, const SmallPtrSet &Visited); + ///@name joinMethods + /// Functions that implement `join` (the least upper bound) for the + /// join-semilattice types used in the dataflow. There is an explicit bottom + /// value (⊥) for some types and and explicit top value (⊤) for all types. + /// By definition: + /// + /// Join(A, B) >= A && Join(A, B) >= B + /// Join(A, ⊥) = A + /// Join(A, ⊤) = ⊤ + /// + /// These invariants are important for monotonicity. + /// + /// For the map-type functions, all unmapped keys in an empty map are + /// associated with a bottom value (⊥). This represents their values being + /// unknown. Unmapped keys in non-empty maps (joining two maps with a key + /// only present in one) represents either a variable going out of scope or + /// dropped debug info. It is assumed the key is associated with a top value + /// (⊤) in this case (unknown location / assignment). + ///@{ + static LocKind joinKind(LocKind A, LocKind B); + static LocMap joinLocMap(const LocMap &A, const LocMap &B); + static Assignment joinAssignment(const Assignment &A, const Assignment &B); + static AssignmentMap joinAssignmentMap(const AssignmentMap &A, + const AssignmentMap &B); + static BlockInfo joinBlockInfo(const BlockInfo &A, const BlockInfo &B); + ///@} + + /// Process the instructions in \p BB updating \p LiveSet along the way. \p + /// LiveSet must be initialized with the current live-in locations before + /// calling this. + void process(BasicBlock &BB, BlockInfo *LiveSet); + ///@name processMethods + /// Methods to process instructions in order to update the LiveSet (current + /// location information). + ///@{ + void processNonDbgInstruction(Instruction &I, BlockInfo *LiveSet); + void processDbgInstruction(Instruction &I, BlockInfo *LiveSet); + /// Update \p LiveSet after encountering an instruction with a DIAssignID + /// attachment, \p I. + void processTaggedInstruction(Instruction &I, BlockInfo *LiveSet); + /// Update \p LiveSet after encountering an instruciton without a DIAssignID + /// attachment, \p I. + void processUntaggedInstruction(Instruction &I, BlockInfo *LiveSet); + void processDbgAssign(DbgAssignIntrinsic &DAI, BlockInfo *LiveSet); + void processDbgValue(DbgValueInst &DVI, BlockInfo *LiveSet); + /// Add an assignment to memory for the variable /p Var. + void addMemDef(BlockInfo *LiveSet, VariableID Var, const Assignment &AV); + /// Add an assignment to the variable /p Var. + void addDbgDef(BlockInfo *LiveSet, VariableID Var, const Assignment &AV); + ///@} + + /// Set the LocKind for \p Var. + void setLocKind(BlockInfo *LiveSet, VariableID Var, LocKind K); + /// Get the live LocKind for a \p Var. Requires addMemDef or addDbgDef to + /// have been called for \p Var first. + LocKind getLocKind(BlockInfo *LiveSet, VariableID Var); + + // Emit info for variables that are fully promoted. + bool emitPromotedVarLocs(FunctionVarLocsBuilder *FnVarLocs); + +public: + AssignmentTrackingLowering(Function &Fn, const DataLayout &Layout, + const DenseSet *VarsWithStackSlot) + : Fn(Fn), Layout(Layout), VarsWithStackSlot(VarsWithStackSlot) {} + /// Run the analysis, adding variable location info to \p FnVarLocs. Returns + /// true if any variable locations have been added to FnVarLocs. + bool run(FunctionVarLocsBuilder *FnVarLocs); +}; + +void AssignmentTrackingLowering::setLocKind(BlockInfo *LiveSet, VariableID Var, + LocKind K) { + auto SetKind = [this](BlockInfo *LiveSet, VariableID Var, LocKind K) { + VarsTouchedThisFrame.insert(Var); + LiveSet->LiveLoc[Var] = K; + }; + SetKind(LiveSet, Var, K); + + // Update the LocKind for all fragments contained within Var. + for (VariableID Frag : VarContains[Var]) + SetKind(LiveSet, Frag, K); +} + +AssignmentTrackingLowering::LocKind +AssignmentTrackingLowering::getLocKind(BlockInfo *LiveSet, VariableID Var) { + auto Pair = LiveSet->LiveLoc.find(Var); + assert(Pair != LiveSet->LiveLoc.end()); + return Pair->second; +} + +void AssignmentTrackingLowering::addMemDef(BlockInfo *LiveSet, VariableID Var, + const Assignment &AV) { + auto AddDef = [](BlockInfo *LiveSet, VariableID Var, Assignment AV) { + LiveSet->StackHomeValue[Var] = AV; + // Add default (Var -> ⊤) to DebugValue if Var isn't in DebugValue yet. + LiveSet->DebugValue.insert({Var, Assignment::makeNoneOrPhi()}); + // Add default (Var -> ⊤) to LiveLocs if Var isn't in LiveLocs yet. Callers + // of addMemDef will call setLocKind to override. + LiveSet->LiveLoc.insert({Var, LocKind::None}); + }; + AddDef(LiveSet, Var, AV); + + // Use this assigment for all fragments contained within Var, but do not + // provide a Source because we cannot convert Var's value to a value for the + // fragment. + Assignment FragAV = AV; + FragAV.Source = nullptr; + for (VariableID Frag : VarContains[Var]) + AddDef(LiveSet, Frag, FragAV); +} + +void AssignmentTrackingLowering::addDbgDef(BlockInfo *LiveSet, VariableID Var, + const Assignment &AV) { + auto AddDef = [](BlockInfo *LiveSet, VariableID Var, Assignment AV) { + LiveSet->DebugValue[Var] = AV; + // Add default (Var -> ⊤) to StackHome if Var isn't in StackHome yet. + LiveSet->StackHomeValue.insert({Var, Assignment::makeNoneOrPhi()}); + // Add default (Var -> ⊤) to LiveLocs if Var isn't in LiveLocs yet. Callers + // of addDbgDef will call setLocKind to override. + LiveSet->LiveLoc.insert({Var, LocKind::None}); + }; + AddDef(LiveSet, Var, AV); + + // Use this assigment for all fragments contained within Var, but do not + // provide a Source because we cannot convert Var's value to a value for the + // fragment. + Assignment FragAV = AV; + FragAV.Source = nullptr; + for (VariableID Frag : VarContains[Var]) + AddDef(LiveSet, Frag, FragAV); +} + +static DIAssignID *getIDFromInst(const Instruction &I) { + return cast(I.getMetadata(LLVMContext::MD_DIAssignID)); +} + +static DIAssignID *getIDFromMarker(const DbgAssignIntrinsic &DAI) { + return cast(DAI.getAssignID()); +} + +/// Return true if \p Var has an assignment in \p M matching \p AV. +static bool hasVarWithAssignment(VariableID Var, + AssignmentTrackingLowering::Assignment AV, + AssignmentTrackingLowering::AssignmentMap &M) { + auto R = M.find(Var); + if (R == M.end()) + return false; + return AV.isSameSourceAssignment(R->second); +} + +const char *locStr(AssignmentTrackingLowering::LocKind Loc) { + using LocKind = AssignmentTrackingLowering::LocKind; + switch (Loc) { + case LocKind::Val: + return "Val"; + case LocKind::Mem: + return "Mem"; + case LocKind::None: + return "None"; + }; + llvm_unreachable("unknown LocKind"); +} + +void AssignmentTrackingLowering::emitDbgValue( + AssignmentTrackingLowering::LocKind Kind, + const DbgVariableIntrinsic *Source, Instruction *After) { + + DILocation *DL = Source->getDebugLoc(); + auto Emit = [this, Source, After, DL](Value *Val, DIExpression *Expr) { + assert(Expr); + // It's possible that getVariableLocationOp(0) is null. Occurs in + // llvm/test/DebugInfo/Generic/2010-05-03-OriginDIE.ll Treat it as undef. + if (!Val) + Val = UndefValue::get(Type::getInt1Ty(Source->getContext())); + + // Find a suitable insert point. + Instruction *InsertBefore = After->getNextNode(); + assert(InsertBefore && "Shouldn't be inserting after a terminator"); + + VariableID Var = getVariableID(DebugVariable(Source)); + VarLocInfo VarLoc; + VarLoc.VariableID = static_cast(Var); + VarLoc.Expr = Expr; + VarLoc.V = Val; + VarLoc.DL = DL; + // Insert it into the map for later. + InsertBeforeMap[InsertBefore].push_back(VarLoc); + }; + + // NOTE: This block can mutate Kind. + if (Kind == LocKind::Mem) { + const auto *DAI = cast(Source); + // Check the address hasn't been dropped (e.g. the debug uses may not have + // been replaced before deleting a Value). + if (Value *Val = DAI->getAddress()) { + DIExpression *Expr = DAI->getAddressExpression(); + assert(!Expr->getFragmentInfo() && + "fragment info should be stored in value-expression only"); + // Copy the fragment info over from the value-expression to the new + // DIExpression. + if (auto OptFragInfo = Source->getExpression()->getFragmentInfo()) { + auto FragInfo = OptFragInfo.value(); + Expr = *DIExpression::createFragmentExpression( + Expr, FragInfo.OffsetInBits, FragInfo.SizeInBits); + } + // The address-expression has an implicit deref, add it now. + std::tie(Val, Expr) = + walkToAllocaAndPrependOffsetDeref(Layout, Val, Expr); + Emit(Val, Expr); + return; + } else { + // The address isn't valid so treat this as a non-memory def. + Kind = LocKind::Val; + } + } + + if (Kind == LocKind::Val) { + /// Get the value component, converting to Undef if it is variadic. + Value *Val = + Source->hasArgList() + ? UndefValue::get(Source->getVariableLocationOp(0)->getType()) + : Source->getVariableLocationOp(0); + Emit(Val, Source->getExpression()); + return; + } + + if (Kind == LocKind::None) { + Value *Val = UndefValue::get(Source->getVariableLocationOp(0)->getType()); + Emit(Val, Source->getExpression()); + return; + } +} + +void AssignmentTrackingLowering::processNonDbgInstruction( + Instruction &I, AssignmentTrackingLowering::BlockInfo *LiveSet) { + if (I.hasMetadata(LLVMContext::MD_DIAssignID)) + processTaggedInstruction(I, LiveSet); + else + processUntaggedInstruction(I, LiveSet); +} + +void AssignmentTrackingLowering::processUntaggedInstruction( + Instruction &I, AssignmentTrackingLowering::BlockInfo *LiveSet) { + // Interpret stack stores that are not tagged as an assignment in memory for + // the variables associated with that address. These stores may not be tagged + // because a) the store cannot be represented using dbg.assigns (non-const + // length or offset) or b) the tag was accidentally dropped during + // optimisations. For these stores we fall back to assuming that the stack + // home is a valid location for the variables. The benefit is that this + // prevents us missing an assignment and therefore incorrectly maintaining + // earlier location definitions, and in many cases it should be a reasonable + // assumption. However, this will occasionally lead slight inaccuracies. The + // value of a hoisted untagged store will be visible "early", for example. + + assert(!I.hasMetadata(LLVMContext::MD_DIAssignID)); + + auto GetAssignmentInfo = [&I, this]() -> Optional { + // Don't bother checking if this is an AllocaInst. We know this + // instruction has no tag which means there are no variables associated + // with it. + if (auto *SI = dyn_cast(&I)) + return at::getAssignmentInfo(Layout, SI); + if (auto *MI = dyn_cast(&I)) + return at::getAssignmentInfo(Layout, MI); + // Alloca or non-store-like inst. + return None; + }; + auto OptInfo = GetAssignmentInfo(); + if (!OptInfo) + return; + at::AssignmentInfo Info = *OptInfo; + if (!Info.Base) + return; // Couldn't walk back to alloca. + auto Linked = at::getAssignmentMarkers(Info.Base); + if (Linked.empty()) + return; // No variable information around. + + LLVM_DEBUG(dbgs() << "processUntaggedInstruction on UNTAGGED INST " << I + << "\n"); + + // Find markers linked to this alloca. + for (DbgAssignIntrinsic *DAI : Linked) { + if (DAI->getExpression()->getNumElements() != 0) + continue; // Don't bother with tricky situations. + + // Something has gone wrong if VarsWithStackSlot doesn't contain a variable + // that is linked to an alloca. + assert(VarsWithStackSlot->count(getAggregate(DAI))); + + DIExpression::FragmentInfo F; + F.OffsetInBits = Info.OffsetInBits; + F.SizeInBits = Info.SizeInBits; + VariableID Var = getVariableID(DebugVariable( + DAI->getVariable(), F, DAI->getDebugLoc().getInlinedAt())); + + // This instruction is treated as both a debug and memory assignment, + // meaning the memory location should be used. We don't have an assignment + // ID though so use Assignment::makeNoneOrPhi() to create an imaginary one. + addMemDef(LiveSet, Var, Assignment::makeNoneOrPhi()); + addDbgDef(LiveSet, Var, Assignment::makeNoneOrPhi()); + setLocKind(LiveSet, Var, LocKind::Mem); + LLVM_DEBUG(dbgs() << " setting Stack LocKind to: " << locStr(LocKind::Mem) + << "\n"); + // Build the dbg.value to insert. + // + // DIExpression: Add fragment and offset. + DIExpression *DIE = DIExpression::get(I.getContext(), None); + if (!Info.StoreToWholeAlloca) { + auto R = DIExpression::createFragmentExpression(DIE, Info.OffsetInBits, + Info.SizeInBits); + assert(R && "unexpected createFragmentExpression failure"); + DIE = R.value(); + } + SmallVector Ops; + if (Info.OffsetInBits) + Ops = {dwarf::DW_OP_plus_uconst, Info.OffsetInBits / 8}; + Ops.push_back(dwarf::DW_OP_deref); + DIE = DIExpression::prependOpcodes(DIE, Ops, /*StackValue=*/false, + /*EntryValue=*/false); + + // Find a suitable insert point. + Instruction *InsertBefore = I.getNextNode(); + assert(InsertBefore && "Shouldn't be inserting after a terminator"); + + VarLocInfo VarLoc; + VarLoc.VariableID = static_cast(Var); + VarLoc.Expr = DIE; + VarLoc.V = const_cast(Info.Base); + VarLoc.DL = DAI->getDebugLoc(); + // 3. Insert it into the map for later. + InsertBeforeMap[InsertBefore].push_back(VarLoc); + } +} + +void AssignmentTrackingLowering::processTaggedInstruction( + Instruction &I, AssignmentTrackingLowering::BlockInfo *LiveSet) { + auto Linked = at::getAssignmentMarkers(&I); + // No dbg.assign intrinsics linked. + // FIXME: All vars that have a stack slot this store modifies that don't have + // a dbg.assign linked to it should probably treat this like an untagged + // store. + if (Linked.empty()) + return; + + LLVM_DEBUG(dbgs() << "processTaggedInstruction on " << I << "\n"); + for (DbgAssignIntrinsic *DAI : Linked) { + VariableID Var = getVariableID(DebugVariable(DAI)); + // Something has gone wrong if VarsWithStackSlot doesn't contain a variable + // that is linked to a store. + assert(VarsWithStackSlot->count(getAggregate(DAI)) && + "expected DAI's variable to have stack slot"); + + Assignment AV = Assignment::makeFromMemDef(getIDFromInst(I)); + addMemDef(LiveSet, Var, AV); + + LLVM_DEBUG(dbgs() << " linked to " << *DAI << "\n"); + LLVM_DEBUG(dbgs() << " LiveLoc " << locStr(getLocKind(LiveSet, Var)) + << " -> "); + + // The last assignment to the stack is now AV. Check if the last debug + // assignment has a matching Assignment. + if (hasVarWithAssignment(Var, AV, LiveSet->DebugValue)) { + // The StackHomeValue and DebugValue for this variable match so we can + // emit a stack home location here. + LLVM_DEBUG(dbgs() << "Mem, Stack matches Debug program\n";); + LLVM_DEBUG(dbgs() << " Stack val: "; AV.dump(dbgs()); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << " Debug val: "; + LiveSet->DebugValue[Var].dump(dbgs()); dbgs() << "\n"); + setLocKind(LiveSet, Var, LocKind::Mem); + emitDbgValue(LocKind::Mem, DAI, &I); + continue; + } + + // The StackHomeValue and DebugValue for this variable do not match. I.e. + // The value currently stored in the stack is not what we'd expect to + // see, so we cannot use emit a stack home location here. Now we will + // look at the live LocKind for the variable and determine an appropriate + // dbg.value to emit. + LocKind PrevLoc = getLocKind(LiveSet, Var); + switch (PrevLoc) { + case LocKind::Val: { + // The value in memory in memory has changed but we're not currently + // using the memory location. Do nothing. + LLVM_DEBUG(dbgs() << "Val, (unchanged)\n";); + setLocKind(LiveSet, Var, LocKind::Val); + } break; + case LocKind::Mem: { + // There's been an assignment to memory that we were using as a + // location for this variable, and the Assignment doesn't match what + // we'd expect to see in memory. + if (LiveSet->DebugValue[Var].Status == Assignment::NoneOrPhi) { + // We need to terminate any previously open location now. + LLVM_DEBUG(dbgs() << "None, No Debug value available\n";); + setLocKind(LiveSet, Var, LocKind::None); + emitDbgValue(LocKind::None, DAI, &I); + } else { + // The previous DebugValue Value can be used here. + LLVM_DEBUG(dbgs() << "Val, Debug value is Known\n";); + setLocKind(LiveSet, Var, LocKind::Val); + Assignment PrevAV = LiveSet->DebugValue.lookup(Var); + if (PrevAV.Source) { + emitDbgValue(LocKind::Val, PrevAV.Source, &I); + } else { + // PrevAV.Source is nullptr so we must emit undef here. + emitDbgValue(LocKind::None, DAI, &I); + } + } + } break; + case LocKind::None: { + // There's been an assignment to memory and we currently are + // not tracking a location for the variable. Do not emit anything. + LLVM_DEBUG(dbgs() << "None, (unchanged)\n";); + setLocKind(LiveSet, Var, LocKind::None); + } break; + } + } +} + +void AssignmentTrackingLowering::processDbgAssign(DbgAssignIntrinsic &DAI, + BlockInfo *LiveSet) { + // Only bother tracking variables that are at some point stack homed. Other + // variables can be dealt with trivially later. + if (!VarsWithStackSlot->count(getAggregate(&DAI))) + return; + + VariableID Var = getVariableID(DebugVariable(&DAI)); + Assignment AV = Assignment::make(getIDFromMarker(DAI), &DAI); + addDbgDef(LiveSet, Var, AV); + + LLVM_DEBUG(dbgs() << "processDbgAssign on " << DAI << "\n";); + LLVM_DEBUG(dbgs() << " LiveLoc " << locStr(getLocKind(LiveSet, Var)) + << " -> "); + + // Check if the DebugValue and StackHomeValue both hold the same + // Assignment. + if (hasVarWithAssignment(Var, AV, LiveSet->StackHomeValue)) { + // They match. We can use the stack home because the debug intrinsics state + // that an assignment happened here, and we know that specific assignment + // was the last one to take place in memory for this variable. + if (isa(DAI.getAddress())) { + // Address may be undef to indicate that although the store does take + // place, this part of the original store has been elided. + LLVM_DEBUG( + dbgs() << "Val, Stack matches Debug program but address is undef\n";); + setLocKind(LiveSet, Var, LocKind::Val); + } else { + LLVM_DEBUG(dbgs() << "Mem, Stack matches Debug program\n";); + setLocKind(LiveSet, Var, LocKind::Mem); + }; + emitDbgValue(LocKind::Mem, &DAI, &DAI); + } else { + // The last assignment to the memory location isn't the one that we want to + // show to the user so emit a dbg.value(Value). Value may be undef. + LLVM_DEBUG(dbgs() << "Val, Stack contents is unknown\n";); + setLocKind(LiveSet, Var, LocKind::Val); + emitDbgValue(LocKind::Val, &DAI, &DAI); + } +} + +void AssignmentTrackingLowering::processDbgValue(DbgValueInst &DVI, + BlockInfo *LiveSet) { + // Only other tracking variables that are at some point stack homed. + // Other variables can be dealt with trivally later. + if (!VarsWithStackSlot->count(getAggregate(&DVI))) + return; + + VariableID Var = getVariableID(DebugVariable(&DVI)); + // We have no ID to create an Assignment with so we mark this assignment as + // NoneOrPhi. Note that the dbg.value still exists, we just cannot determine + // the assignment responsible for setting this value. + // This is fine; dbg.values are essentially interchangable with unlinked + // dbg.assigns, and some passes such as mem2reg and instcombine add them to + // PHIs for promoted variables. + Assignment AV = Assignment::makeNoneOrPhi(); + addDbgDef(LiveSet, Var, AV); + + LLVM_DEBUG(dbgs() << "processDbgValue on " << DVI << "\n";); + LLVM_DEBUG(dbgs() << " LiveLoc " << locStr(getLocKind(LiveSet, Var)) + << " -> Val, dbg.value override"); + + setLocKind(LiveSet, Var, LocKind::Val); + emitDbgValue(LocKind::Val, &DVI, &DVI); +} + +void AssignmentTrackingLowering::processDbgInstruction( + Instruction &I, AssignmentTrackingLowering::BlockInfo *LiveSet) { + assert(!isa(&I) && "unexpected dbg.addr"); + if (auto *DAI = dyn_cast(&I)) + processDbgAssign(*DAI, LiveSet); + else if (auto *DVI = dyn_cast(&I)) + processDbgValue(*DVI, LiveSet); +} + +void AssignmentTrackingLowering::resetInsertionPoint(Instruction *After) { + assert(!After->isTerminator() && "Can't insert after a terminator"); + return; + auto R = InsertBeforeMap.find(After->getNextNode()); + if (R == InsertBeforeMap.end()) + return; + R->second.clear(); +} + +void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) { + for (auto II = BB.begin(), EI = BB.end(); II != EI;) { + assert(VarsTouchedThisFrame.empty()); + // Process the instructions in "frames". A "frame" includes a single + // non-debug instruction followed any debug instructions before the + // next non-debug instruction. + if (!isa(&*II)) { + if (II->isTerminator()) + break; + resetInsertionPoint(&*II); + processNonDbgInstruction(*II, LiveSet); + assert(LiveSet->isValid()); + ++II; + } + while (II != EI) { + if (!isa(&*II)) + break; + resetInsertionPoint(&*II); + processDbgInstruction(*II, LiveSet); + assert(LiveSet->isValid()); + ++II; + } + + // We've processed everything in the "frame". Now determine which variables + // cannot be represented by a dbg.declare. + for (auto Var : VarsTouchedThisFrame) { + LocKind Loc = getLocKind(LiveSet, Var); + // If a variable's LocKind is anything other than LocKind::Mem then we + // must note that it cannot be represented with a dbg.declare. + // Note that this check is enough without having to check the result of + // joins() because for join to produce anything other than Mem after + // we've already seen a Mem we'd be joining None or Val with Mem. In that + // case, we've already hit this codepath when we set the LocKind to Val + // or None in that block. + if (Loc != LocKind::Mem) { + DebugVariable DbgVar = FnVarLocs->getVariable(Var); + DebugAggregate Aggr{DbgVar.getVariable(), DbgVar.getInlinedAt()}; + NotAlwaysStackHomed.insert(Aggr); + } + } + VarsTouchedThisFrame.clear(); + } +} + +AssignmentTrackingLowering::LocKind +AssignmentTrackingLowering::joinKind(LocKind A, LocKind B) { + // Partial order: + // None > Mem, Val + return A == B ? A : LocKind::None; +} + +AssignmentTrackingLowering::LocMap +AssignmentTrackingLowering::joinLocMap(const LocMap &A, const LocMap &B) { + // Join A and B. + // + // U = join(a, b) for a in A, b in B where Var(a) == Var(b) + // D = join(x, ⊤) for x where Var(x) is in A xor B + // Join = U ∪ D + // + // This is achieved by performing a join on elements from A and B with + // variables common to both A and B (join elements indexed by var intersect), + // then adding LocKind::None elements for vars in A xor B. The latter part is + // equivalent to performing join on elements with variables in A xor B with + // LocKind::None (⊤) since join(x, ⊤) = ⊤. + LocMap Join; + SmallVector SymmetricDifference; + // Insert the join of the elements with common vars into Join. Add the + // remaining elements to into SymmetricDifference. + for (const auto &[Var, Loc] : A) { + // If this Var doesn't exist in B then add it to the symmetric difference + // set. + auto R = B.find(Var); + if (R == B.end()) { + SymmetricDifference.push_back(Var); + continue; + } + // There is an entry for Var in both, join it. + Join[Var] = joinKind(Loc, R->second); + } + unsigned IntersectSize = Join.size(); + (void)IntersectSize; + + // Add the elements in B with variables that are not in A into + // SymmetricDifference. + for (const auto &Pair : B) { + VariableID Var = Pair.first; + if (A.count(Var) == 0) + SymmetricDifference.push_back(Var); + } + + // Add SymmetricDifference elements to Join and return the result. + for (const auto &Var : SymmetricDifference) + Join.insert({Var, LocKind::None}); + + assert(Join.size() == (IntersectSize + SymmetricDifference.size())); + assert(Join.size() >= A.size() && Join.size() >= B.size()); + return Join; +} + +AssignmentTrackingLowering::Assignment +AssignmentTrackingLowering::joinAssignment(const Assignment &A, + const Assignment &B) { + // Partial order: + // NoneOrPhi(null, null) > Known(v, ?s) + + // If either are NoneOrPhi the join is NoneOrPhi. + // If either value is different then the result is + // NoneOrPhi (joining two values is a Phi). + if (!A.isSameSourceAssignment(B)) + return Assignment::makeNoneOrPhi(); + if (A.Status == Assignment::NoneOrPhi) + return Assignment::makeNoneOrPhi(); + + // Source is used to lookup the value + expression in the debug program if + // the stack slot gets assigned a value earlier than expected. Because + // we're only tracking the one dbg.assign, we can't capture debug PHIs. + // It's unlikely that we're losing out on much coverage by avoiding that + // extra work. + // The Source may differ in this situation: + // Pred.1: + // dbg.assign i32 0, ..., !1, ... + // Pred.2: + // dbg.assign i32 1, ..., !1, ... + // Here the same assignment (!1) was performed in both preds in the source, + // but we can't use either one unless they are identical (e.g. .we don't + // want to arbitrarily pick between constant values). + auto JoinSource = [&]() -> DbgAssignIntrinsic * { + if (A.Source == B.Source) + return A.Source; + if (A.Source == nullptr || B.Source == nullptr) + return nullptr; + if (A.Source->isIdenticalTo(B.Source)) + return A.Source; + return nullptr; + }; + DbgAssignIntrinsic *Source = JoinSource(); + assert(A.Status == B.Status && A.Status == Assignment::Known); + assert(A.ID == B.ID); + return Assignment::make(A.ID, Source); +} + +AssignmentTrackingLowering::AssignmentMap +AssignmentTrackingLowering::joinAssignmentMap(const AssignmentMap &A, + const AssignmentMap &B) { + // Join A and B. + // + // U = join(a, b) for a in A, b in B where Var(a) == Var(b) + // D = join(x, ⊤) for x where Var(x) is in A xor B + // Join = U ∪ D + // + // This is achieved by performing a join on elements from A and B with + // variables common to both A and B (join elements indexed by var intersect), + // then adding LocKind::None elements for vars in A xor B. The latter part is + // equivalent to performing join on elements with variables in A xor B with + // Status::NoneOrPhi (⊤) since join(x, ⊤) = ⊤. + AssignmentMap Join; + SmallVector SymmetricDifference; + // Insert the join of the elements with common vars into Join. Add the + // remaining elements to into SymmetricDifference. + for (const auto &[Var, AV] : A) { + // If this Var doesn't exist in B then add it to the symmetric difference + // set. + auto R = B.find(Var); + if (R == B.end()) { + SymmetricDifference.push_back(Var); + continue; + } + // There is an entry for Var in both, join it. + Join[Var] = joinAssignment(AV, R->second); + } + unsigned IntersectSize = Join.size(); + (void)IntersectSize; + + // Add the elements in B with variables that are not in A into + // SymmetricDifference. + for (const auto &Pair : B) { + VariableID Var = Pair.first; + if (A.count(Var) == 0) + SymmetricDifference.push_back(Var); + } + + // Add SymmetricDifference elements to Join and return the result. + for (auto Var : SymmetricDifference) + Join.insert({Var, Assignment::makeNoneOrPhi()}); + + assert(Join.size() == (IntersectSize + SymmetricDifference.size())); + assert(Join.size() >= A.size() && Join.size() >= B.size()); + return Join; +} + +AssignmentTrackingLowering::BlockInfo +AssignmentTrackingLowering::joinBlockInfo(const BlockInfo &A, + const BlockInfo &B) { + BlockInfo Join; + Join.LiveLoc = joinLocMap(A.LiveLoc, B.LiveLoc); + Join.StackHomeValue = joinAssignmentMap(A.StackHomeValue, B.StackHomeValue); + Join.DebugValue = joinAssignmentMap(A.DebugValue, B.DebugValue); + assert(Join.isValid()); + return Join; +} + +bool AssignmentTrackingLowering::join( + const BasicBlock &BB, const SmallPtrSet &Visited) { + BlockInfo BBLiveIn; + bool FirstJoin = true; + // LiveIn locs for BB is the join of the already-processed preds' LiveOut + // locs. + for (auto I = pred_begin(&BB), E = pred_end(&BB); I != E; I++) { + // Ignore backedges if we have not visited the predecessor yet. As the + // predecessor hasn't yet had locations propagated into it, most locations + // will not yet be valid, so treat them as all being uninitialized and + // potentially valid. If a location guessed to be correct here is + // invalidated later, we will remove it when we revisit this block. This + // is essentially the same as initialising all LocKinds and Assignments to + // an implicit ⊥ value which is the identity value for the join operation. + const BasicBlock *Pred = *I; + if (!Visited.count(Pred)) + continue; + + auto PredLiveOut = LiveOut.find(Pred); + // Pred must have been processed already. See comment at start of this loop. + assert(PredLiveOut != LiveOut.end()); + + // Perform the join of BBLiveIn (current live-in info) and PrevLiveOut. + if (FirstJoin) + BBLiveIn = PredLiveOut->second; + else + BBLiveIn = joinBlockInfo(std::move(BBLiveIn), PredLiveOut->second); + FirstJoin = false; + } + + auto CurrentLiveInEntry = LiveIn.find(&BB); + // Check if there isn't an entry, or there is but the LiveIn set has changed + // (expensive check). + if (CurrentLiveInEntry == LiveIn.end() || + BBLiveIn != CurrentLiveInEntry->second) { + LiveIn[&BB] = std::move(BBLiveIn); + // A change has occured. + return true; + } + // No change. + return false; +} + +/// Return true if A fully contains B. +static bool fullyContains(DIExpression::FragmentInfo A, + DIExpression::FragmentInfo B) { + auto ALeft = A.OffsetInBits; + auto BLeft = B.OffsetInBits; + if (BLeft < ALeft) + return false; + + auto ARight = ALeft + A.SizeInBits; + auto BRight = BLeft + B.SizeInBits; + if (BRight > ARight) + return false; + return true; +} + +/// Build a map of {Variable x: Variables y} where all variable fragments +/// contained within the variable fragment x are in set y. This means that +/// y does not contain all overlaps because partial overlaps are excluded. +/// While we're iterating over the function, add single location defs for +/// dbg.declares to \p FnVarLocs. +static AssignmentTrackingLowering::OverlapMap +buildOverlapMapAndRecordDeclares(Function &Fn, + FunctionVarLocsBuilder *FnVarLocs) { + DenseSet Seen; + DenseMap> FragmentMap; + for (auto &BB : Fn) { + for (auto &I : BB) { + if (auto *DDI = dyn_cast(&I)) { + FnVarLocs->addSingleLocVar(DebugVariable(DDI), DDI->getExpression(), + DDI->getDebugLoc(), DDI->getAddress()); + } else if (auto *DII = dyn_cast(&I)) { + DebugVariable DV = DebugVariable(DII); + DebugAggregate DA = {DV.getVariable(), DV.getInlinedAt()}; + if (Seen.insert(DV).second) + FragmentMap[DA].push_back(DV); + } + } + } + + // Sort the fragment map for each DebugAggregate in non-descending + // order of fragment size. Assert no entries are duplicates. + for (auto &Pair : FragmentMap) { + SmallVector &Frags = Pair.second; + std::sort( + Frags.begin(), Frags.end(), [](DebugVariable Next, DebugVariable Elmt) { + assert(!(Elmt.getFragmentOrDefault() == Next.getFragmentOrDefault())); + return Elmt.getFragmentOrDefault().SizeInBits > + Next.getFragmentOrDefault().SizeInBits; + }); + } + + // Build the map. + AssignmentTrackingLowering::OverlapMap Map; + for (auto Pair : FragmentMap) { + auto &Frags = Pair.second; + for (auto It = Frags.begin(), IEnd = Frags.end(); It != IEnd; ++It) { + DIExpression::FragmentInfo Frag = It->getFragmentOrDefault(); + // Find the frags that this is contained within. + // + // Because Frags is sorted by size and none have the same offset and + // size, we know that this frag can only be contained by subsequent + // elements. + SmallVector::iterator OtherIt = It; + ++OtherIt; + VariableID ThisVar = FnVarLocs->insertVariable(*It); + for (; OtherIt != IEnd; ++OtherIt) { + DIExpression::FragmentInfo OtherFrag = OtherIt->getFragmentOrDefault(); + VariableID OtherVar = FnVarLocs->insertVariable(*OtherIt); + if (fullyContains(OtherFrag, Frag)) + Map[OtherVar].push_back(ThisVar); + } + } + } + + return Map; +} + +bool AssignmentTrackingLowering::run(FunctionVarLocsBuilder *FnVarLocsBuilder) { + if (Fn.size() > MaxNumBlocks) { + LLVM_DEBUG(dbgs() << "[AT] Dropping var locs in: " << Fn.getName() + << ": too many blocks (" << Fn.size() << ")\n"); + at::deleteAll(&Fn); + return false; + } + + FnVarLocs = FnVarLocsBuilder; + + // The general structure here is inspired by VarLocBasedImpl.cpp + // (LiveDebugValues). + + // Build the variable fragment overlap map. + // Note that this pass doesn't handle partial overlaps correctly (FWIW + // neither does LiveDebugVariables) because that is difficult to do and + // appears to be rare occurance. + VarContains = buildOverlapMapAndRecordDeclares(Fn, FnVarLocs); + + // Prepare for traversal. + ReversePostOrderTraversal RPOT(&Fn); + std::priority_queue, + std::greater> + Worklist; + std::priority_queue, + std::greater> + Pending; + DenseMap OrderToBB; + DenseMap BBToOrder; + { // Init OrderToBB and BBToOrder. + unsigned int RPONumber = 0; + for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) { + OrderToBB[RPONumber] = *RI; + BBToOrder[*RI] = RPONumber; + Worklist.push(RPONumber); + ++RPONumber; + } + LiveIn.init(RPONumber); + LiveOut.init(RPONumber); + } + + // Perform the traversal. + // + // This is a standard "union of predecessor outs" dataflow problem. To solve + // it, we perform join() and process() using the two worklist method until + // the LiveIn data for each block becomes unchanging. The "proof" that this + // terminates can be put together by looking at the comments around LocKind, + // Assignment, and the various join methods, which show that all the elements + // involved are made up of join-semilattices; LiveIn(n) can only + // monotonically increase in value throughout the dataflow. + // + SmallPtrSet Visited; + while (!Worklist.empty()) { + // We track what is on the pending worklist to avoid inserting the same + // thing twice. + SmallPtrSet OnPending; + LLVM_DEBUG(dbgs() << "Processing Worklist\n"); + while (!Worklist.empty()) { + BasicBlock *BB = OrderToBB[Worklist.top()]; + LLVM_DEBUG(dbgs() << "\nPop BB " << BB->getName() << "\n"); + Worklist.pop(); + bool InChanged = join(*BB, Visited); + // Always consider LiveIn changed on the first visit. + InChanged |= Visited.insert(BB).second; + if (InChanged) { + LLVM_DEBUG(dbgs() << BB->getName() << " has new InLocs, process it\n"); + // Mutate a copy of LiveIn while processing BB. After calling process + // LiveSet is the LiveOut set for BB. + BlockInfo LiveSet = LiveIn[BB]; + + // Process the instructions in the block. + process(*BB, &LiveSet); + + // Relatively expensive check: has anything changed in LiveOut for BB? + if (LiveOut[BB] != LiveSet) { + LLVM_DEBUG(dbgs() << BB->getName() + << " has new OutLocs, add succs to worklist: [ "); + LiveOut[BB] = std::move(LiveSet); + for (auto I = succ_begin(BB), E = succ_end(BB); I != E; I++) { + if (OnPending.insert(*I).second) { + LLVM_DEBUG(dbgs() << I->getName() << " "); + Pending.push(BBToOrder[*I]); + } + } + LLVM_DEBUG(dbgs() << "]\n"); + } + } + } + Worklist.swap(Pending); + // At this point, pending must be empty, since it was just the empty + // worklist + assert(Pending.empty() && "Pending should be empty"); + } + + // That's the hard part over. Now we just have some admin to do. + + // Record whether we inserted any intrinsics. + bool InsertedAnyIntrinsics = false; + + // Identify and add defs for single location variables. + // + // Go through all of the defs that we plan to add. If the aggregate variable + // it's a part of is not in the NotAlwaysStackHomed set we can emit a single + // location def and omit the rest. Add an entry to AlwaysStackHomed so that + // we can identify those uneeded defs later. + DenseSet AlwaysStackHomed; + for (const auto &Pair : InsertBeforeMap) { + const auto &Vec = Pair.second; + for (VarLocInfo VarLoc : Vec) { + DebugVariable Var = FnVarLocs->getVariable(VarLoc.VariableID); + DebugAggregate Aggr{Var.getVariable(), Var.getInlinedAt()}; + + // Skip this Var if it's not always stack homed. + if (NotAlwaysStackHomed.contains(Aggr)) + continue; + + // Skip complex cases such as when different fragments of a variable have + // been split into different allocas. Skipping in this case means falling + // back to using a list of defs (which could reduce coverage, but is no + // less correct). + bool Simple = + VarLoc.Expr->getNumElements() == 1 && VarLoc.Expr->startsWithDeref(); + if (!Simple) { + NotAlwaysStackHomed.insert(Aggr); + continue; + } + + // All source assignments to this variable remain and all stores to any + // part of the variable store to the same address (with varying + // offsets). We can just emit a single location for the whole variable. + // + // Unless we've already done so, create the single location def now. + if (AlwaysStackHomed.insert(Aggr).second) { + assert(isa(VarLoc.V)); + // TODO: When more complex cases are handled VarLoc.Expr should be + // built appropriately rather than always using an empty DIExpression. + // The assert below is a reminder. + assert(Simple); + VarLoc.Expr = DIExpression::get(Fn.getContext(), None); + DebugVariable Var = FnVarLocs->getVariable(VarLoc.VariableID); + FnVarLocs->addSingleLocVar(Var, VarLoc.Expr, VarLoc.DL, VarLoc.V); + InsertedAnyIntrinsics = true; + } + } + } + + // Insert the other DEFs. + for (const auto &[InsertBefore, Vec] : InsertBeforeMap) { + SmallVector NewDefs; + for (const VarLocInfo &VarLoc : Vec) { + DebugVariable Var = FnVarLocs->getVariable(VarLoc.VariableID); + DebugAggregate Aggr{Var.getVariable(), Var.getInlinedAt()}; + // If this variable is always stack homed then we have already inserted a + // dbg.declare and deleted this dbg.value. + if (AlwaysStackHomed.contains(Aggr)) + continue; + NewDefs.push_back(VarLoc); + InsertedAnyIntrinsics = true; + } + + FnVarLocs->setWedge(InsertBefore, std::move(NewDefs)); + } + + InsertedAnyIntrinsics |= emitPromotedVarLocs(FnVarLocs); + + return InsertedAnyIntrinsics; +} + +bool AssignmentTrackingLowering::emitPromotedVarLocs( + FunctionVarLocsBuilder *FnVarLocs) { + bool InsertedAnyIntrinsics = false; + // Go through every block, translating debug intrinsics for fully promoted + // variables into FnVarLocs location defs. No analysis required for these. + for (auto &BB : Fn) { + for (auto &I : BB) { + // Skip instructions other than dbg.values and dbg.assigns. + auto *DVI = dyn_cast(&I); + if (!DVI) + continue; + // Skip variables that haven't been promoted - we've dealt with those + // already. + if (VarsWithStackSlot->contains(getAggregate(DVI))) + continue; + // Wrapper to get a single value (or undef) from DVI. + auto GetValue = [DVI]() -> Value * { + // Conditions for undef: Any operand undef, zero operands or single + // operand is nullptr. We also can't handle variadic DIExpressions yet. + // Some of those conditions don't have a type we can pick for + // undef. Use i32. + if (DVI->isUndef() || DVI->getValue() == nullptr || DVI->hasArgList()) + return UndefValue::get(Type::getInt32Ty(DVI->getContext())); + return DVI->getValue(); + }; + Instruction *InsertBefore = I.getNextNode(); + assert(InsertBefore && "Unexpected: debug intrinsics after a terminator"); + FnVarLocs->addVarLoc(InsertBefore, DebugVariable(DVI), + DVI->getExpression(), DVI->getDebugLoc(), + GetValue()); + InsertedAnyIntrinsics = true; + } + } + return InsertedAnyIntrinsics; +} + +static DenseSet findVarsWithStackSlot(Function &Fn) { + DenseSet Result; + for (auto &BB : Fn) { + for (auto &I : BB) { + // Any variable linked to an instruction is considered + // interesting. Ideally we only need to check Allocas, however, a + // DIAssignID might get dropped from an alloca but not stores. In that + // case, we need to consider the variable interesting for NFC behaviour + // with this change. TODO: Consider only looking at allocas. + for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(&I)) { + Result.insert({DAI->getVariable(), DAI->getDebugLoc().getInlinedAt()}); + } + } + } + return Result; +} + +static void analyzeFunction(Function &Fn, const DataLayout &Layout, + FunctionVarLocsBuilder *FnVarLocs) { + // The analysis will generate location definitions for all variables, but we + // only need to perform a dataflow on the set of variables which have a stack + // slot. Find those now. + DenseSet VarsWithStackSlot = findVarsWithStackSlot(Fn); + AssignmentTrackingLowering Pass(Fn, Layout, &VarsWithStackSlot); + Pass.run(FnVarLocs); +} + +bool AssignmentTrackingAnalysis::runOnFunction(Function &F) { + LLVM_DEBUG(dbgs() << "AssignmentTrackingAnalysis run on " << F.getName() + << "\n"); + auto DL = std::make_unique(F.getParent()); + + // Clear previous results. + Results->clear(); + + FunctionVarLocsBuilder Builder; + analyzeFunction(F, *DL.get(), &Builder); + + // Save these results. + Results->init(Builder); + + if (PrintResults && isFunctionInPrintList(F.getName())) + Results->print(errs(), F); + + // Return false because this pass does not modify the function. + return false; +} + +AssignmentTrackingAnalysis::AssignmentTrackingAnalysis() + : FunctionPass(ID), Results(std::make_unique()) {} + +char AssignmentTrackingAnalysis::ID = 0; + +INITIALIZE_PASS(AssignmentTrackingAnalysis, DEBUG_TYPE, + "Assignment Tracking Analysis", false, true) Index: llvm/lib/CodeGen/CMakeLists.txt =================================================================== --- llvm/lib/CodeGen/CMakeLists.txt +++ llvm/lib/CodeGen/CMakeLists.txt @@ -26,6 +26,7 @@ AggressiveAntiDepBreaker.cpp AllocationOrder.cpp Analysis.cpp + AssignmentTrackingAnalysis.cpp AtomicExpandPass.cpp BasicTargetTransformInfo.cpp BranchFolding.cpp Index: llvm/lib/CodeGen/CodeGen.cpp =================================================================== --- llvm/lib/CodeGen/CodeGen.cpp +++ llvm/lib/CodeGen/CodeGen.cpp @@ -19,6 +19,7 @@ /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { + initializeAssignmentTrackingAnalysisPass(Registry); initializeAtomicExpandPass(Registry); initializeBasicBlockSectionsPass(Registry); initializeBranchFolderPassPass(Registry);