diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -66,6 +66,7 @@
 class MCTargetOptions;
 class MDNode;
 class Module;
+class PseudoProbeHandler;
 class raw_ostream;
 class StackMaps;
 class StringRef;
@@ -206,6 +207,10 @@
   /// If the target supports dwarf debug info, this pointer is non-null.
   DwarfDebug *DD = nullptr;
 
+  /// A handler that supports pseudo probe emission with embedded inline
+  /// context.
+  PseudoProbeHandler *PP = nullptr;
+
   /// If the current module uses dwarf CFI annotations strictly for debugging.
   bool isCFIMoveForDebugging = false;
 
@@ -360,6 +365,8 @@
 
   void emitBBAddrMapSection(const MachineFunction &MF);
 
+  void emitPseudoProbe(const MachineInstr &MI);
+
   void emitRemarksSection(remarks::RemarkStreamer &RS);
 
   enum CFIMoveType { CFI_M_None, CFI_M_EH, CFI_M_Debug };
diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h
--- a/llvm/include/llvm/IR/MDBuilder.h
+++ b/llvm/include/llvm/IR/MDBuilder.h
@@ -76,6 +76,9 @@
   /// Return metadata containing the section prefix for a function.
   MDNode *createFunctionSectionPrefix(StringRef Prefix);
 
+  /// Return metadata containing the pseudo probe descriptor for a function.
+  MDNode *createPseudoProbeDesc(uint64_t GUID, uint64_t Hash, Function *F);
+
   //===------------------------------------------------------------------===//
   // Range metadata.
   //===------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h
--- a/llvm/include/llvm/IR/PseudoProbe.h
+++ b/llvm/include/llvm/IR/PseudoProbe.h
@@ -18,6 +18,8 @@
 
 namespace llvm {
 
+constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc";
+
 enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall };
 
 struct PseudoProbeDwarfDiscriminator {
diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h
--- a/llvm/include/llvm/MC/MCAssembler.h
+++ b/llvm/include/llvm/MC/MCAssembler.h
@@ -202,6 +202,7 @@
   bool relaxCVInlineLineTable(MCAsmLayout &Layout,
                               MCCVInlineLineTableFragment &DF);
   bool relaxCVDefRange(MCAsmLayout &Layout, MCCVDefRangeFragment &DF);
+  bool relaxPseudoProbeAddr(MCAsmLayout &Layout, MCPseudoProbeAddrFragment &DF);
 
   /// finishLayout - Finalize a layout, including fragment lowering.
   void finishLayout(MCAsmLayout &Layout);
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -22,6 +22,7 @@
 #include "llvm/BinaryFormat/XCOFF.h"
 #include "llvm/MC/MCAsmMacro.h"
 #include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCPseudoProbe.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCTargetOptions.h"
 #include "llvm/MC/SectionKind.h"
@@ -199,6 +200,9 @@
     /// The Compile Unit ID that we are currently processing.
     unsigned DwarfCompileUnitID = 0;
 
+    /// A collection of MCPseudoProbe in the current module
+    MCPseudoProbeTable PseudoProbeTable;
+
     // Sections are differentiated by the quadruple (section_name, group_name,
     // unique_id, link_to_symbol_name). Sections sharing the same quadruple are
     // combined into one section.
@@ -749,6 +753,8 @@
     }
 
     void undefineMacro(StringRef Name) { MacroMap.erase(Name); }
+
+    MCPseudoProbeTable &getMCPseudoProbeTable() { return PseudoProbeTable; }
   };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h
--- a/llvm/include/llvm/MC/MCFragment.h
+++ b/llvm/include/llvm/MC/MCFragment.h
@@ -47,6 +47,7 @@
     FT_SymbolId,
     FT_CVInlineLines,
     FT_CVDefRange,
+    FT_PseudoProbe,
     FT_Dummy
   };
 
@@ -147,6 +148,7 @@
     case MCFragment::FT_Data:
     case MCFragment::FT_Dwarf:
     case MCFragment::FT_DwarfFrame:
+    case MCFragment::FT_PseudoProbe:
       return true;
     }
   }
@@ -591,6 +593,23 @@
     return F->getKind() == MCFragment::FT_BoundaryAlign;
   }
 };
+
+class MCPseudoProbeAddrFragment : public MCEncodedFragmentWithFixups<8, 1> {
+  /// The expression for the difference of the two symbols that
+  /// make up the address delta between two .pseudoprobe directives.
+  const MCExpr *AddrDelta;
+
+public:
+  MCPseudoProbeAddrFragment(const MCExpr *AddrDelta, MCSection *Sec = nullptr)
+      : MCEncodedFragmentWithFixups<8, 1>(FT_PseudoProbe, false, Sec),
+        AddrDelta(AddrDelta) {}
+
+  const MCExpr &getAddrDelta() const { return *AddrDelta; }
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_PseudoProbe;
+  }
+};
 } // end namespace llvm
 
 #endif // LLVM_MC_MCFRAGMENT_H
diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h
--- a/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -174,6 +174,10 @@
   /// Section containing metadata on function stack sizes.
   MCSection *StackSizesSection = nullptr;
 
+  /// Section for pseudo probe information used by AutoFDO
+  MCSection *PseudoProbeSection = nullptr;
+  MCSection *PseudoProbeDescSection = nullptr;
+
   // ELF specific sections.
   MCSection *DataRelROSection = nullptr;
   MCSection *MergeableConst4Section = nullptr;
@@ -340,6 +344,10 @@
 
   MCSection *getBBAddrMapSection(const MCSection &TextSec) const;
 
+  MCSection *getPseudoProbeSection(const MCSection *TextSec) const;
+
+  MCSection *getPseudoProbeDescSection(StringRef FuncName) const;
+
   // ELF specific sections.
   MCSection *getDataRelROSection() const { return DataRelROSection; }
   const MCSection *getMergeableConst4Section() const {
diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
new file mode 100644
--- /dev/null
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -0,0 +1,178 @@
+//===- MCPseudoProbe.h - Pseudo probe encoding support ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCPseudoProbe to support the pseudo
+// probe encoding for AutoFDO. Pseudo probes together with their inline context
+// are encoded in a DFS recursive way in the .pseudoprobe sections. For each
+// .pseudoprobe section, the encoded binary data consist of a single or mutiple
+// function records each for one outlined function. A function record has the
+// following format :
+//
+// FUNCTION BODY (one for each outlined function present in the text section)
+//    GUID (uint64)
+//        GUID of the function
+//    NPROBES (ULEB128)
+//        Number of probes originating from this function.
+//    NUM_INLINED_FUNCTIONS (ULEB128)
+//        Number of callees inlined into this function, aka number of
+//        first-level inlinees
+//    PROBE RECORDS
+//        A list of NPROBES entries. Each entry contains:
+//          INDEX (ULEB128)
+//          TYPE (uint4)
+//            0 - block probe, 1 - indirect call, 2 - direct call
+//          ATTRIBUTE (uint3)
+//            reserved
+//          ADDRESS_TYPE (uint1)
+//            0 - code address, 1 - address delta
+//          CODE_ADDRESS (uint64 or ULEB128)
+//            code address or address delta, depending on ADDRESS_TYPE
+//    INLINED FUNCTION RECORDS
+//        A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
+//        callees.  Each record contains:
+//          INLINE SITE
+//            GUID of the inlinee (uint64)
+//            ID of the callsite probe (ULEB128)
+//          FUNCTION BODY
+//            A FUNCTION BODY entry describing the inlined function.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCPSEUDOPROBE_H
+#define LLVM_MC_MCPSEUDOPROBE_H
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/MC/MCSection.h"
+#include <functional>
+#include <map>
+#include <vector>
+
+namespace llvm {
+
+class MCStreamer;
+class MCSymbol;
+class MCObjectStreamer;
+
+enum class MCPseudoProbeFlag {
+  // If set, indicates that the probe is encoded as an address delta
+  // instead of a real code address.
+  AddressDelta = 0x1,
+};
+
+/// Instances of this class represent a pseudo probe instance for a pseudo probe
+/// table entry, which is created during a machine instruction is assembled and
+/// uses an address from a temporary label created at the current address in the
+/// current section.
+class MCPseudoProbe {
+  MCSymbol *Label;
+  uint64_t Guid;
+  uint64_t Index;
+  uint8_t Type;
+  uint8_t Attributes;
+
+public:
+  MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type,
+                uint64_t Attributes)
+      : Label(Label), Guid(Guid), Index(Index), Type(Type),
+        Attributes(Attributes) {
+    assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8");
+    assert(Attributes <= 0xFF &&
+           "Probe attributes too big to encode, exceeding 2^16");
+  }
+
+  MCSymbol *getLabel() const { return Label; }
+
+  uint64_t getGuid() const { return Guid; }
+
+  uint64_t getIndex() const { return Index; }
+
+  uint8_t getType() const { return Type; }
+
+  uint8_t getAttributes() const { return Attributes; }
+
+  void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const;
+};
+
+// An inline frame has the form <Guid, ProbeID>
+using InlineSite = std::tuple<uint64_t, uint32_t>;
+using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
+
+// A Tri-tree based data structure to group probes by inline stack.
+// A tree is allocated for a standalone .text section. A fake
+// instance is created as the root of a tree.
+// A real instance of this class is created for each function, either an
+// unlined function that has code in .text section or an inlined function.
+class MCPseudoProbeInlineTree {
+  uint64_t Guid;
+  // Set of probes that come with the function.
+  std::vector<MCPseudoProbe> Probes;
+  // Use std::map for a deterministic output.
+  std::map<InlineSite, MCPseudoProbeInlineTree *> Inlinees;
+
+  // Root node has a GUID 0.
+  bool isRoot() { return Guid == 0; }
+  MCPseudoProbeInlineTree *getOrAddNode(InlineSite Site);
+
+public:
+  MCPseudoProbeInlineTree() = default;
+  MCPseudoProbeInlineTree(uint64_t Guid) : Guid(Guid) {}
+  ~MCPseudoProbeInlineTree();
+  void addPseudoProbe(const MCPseudoProbe &Probe,
+                      const MCPseudoProbeInlineStack &InlineStack);
+  void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *&LastProbe);
+};
+
+/// Instances of this class represent the pseudo probes inserted into a compile
+/// unit.
+class MCPseudoProbeSection {
+public:
+  void addPseudoProbe(MCSection *Sec, const MCPseudoProbe &Probe,
+                      const MCPseudoProbeInlineStack &InlineStack) {
+    MCProbeDivisions[Sec].addPseudoProbe(Probe, InlineStack);
+  }
+
+  // TODO: Sort by getOrdinal to ensure a determinstic section order
+  using MCProbeDivisionMap = std::map<MCSection *, MCPseudoProbeInlineTree>;
+
+private:
+  // A collection of MCPseudoProbe for each text section. The MCPseudoProbes
+  // are grouped by GUID of the functions where they are from and will be
+  // encoded by groups. In the comdat scenario where a text section really only
+  // contains the code of a function solely, the probes associated with a comdat
+  // function are still grouped by GUIDs due to inlining that can bring probes
+  // from different functions into one function.
+  MCProbeDivisionMap MCProbeDivisions;
+
+public:
+  const MCProbeDivisionMap &getMCProbes() const { return MCProbeDivisions; }
+
+  bool empty() const { return MCProbeDivisions.empty(); }
+
+  void emit(MCObjectStreamer *MCOS);
+};
+
+class MCPseudoProbeTable {
+  // A collection of MCPseudoProbe in the current module grouped by text
+  // sections. MCPseudoProbes will be encoded into a corresponding
+  // .pseudoprobe section. With functions emitted as separate comdats,
+  // a text section really only contains the code of a function solely, and the
+  // probes associated with the text section will be emitted into a standalone
+  // .pseudoprobe section that shares the same comdat group with the function.
+  MCPseudoProbeSection MCProbeSections;
+
+public:
+  static void emit(MCObjectStreamer *MCOS);
+
+  MCPseudoProbeSection &getProbeSections() { return MCProbeSections; }
+
+#ifndef NDEBUG
+  static int DdgPrintIndent;
+#endif
+};
+} // end namespace llvm
+
+#endif // LLVM_MC_MCPSEUDOPROBE_H
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -21,6 +21,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCLinkerOptimizationHint.h"
+#include "llvm/MC/MCPseudoProbe.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCWinEH.h"
 #include "llvm/Support/Error.h"
@@ -1049,6 +1050,11 @@
   /// Emit the given \p Instruction into the current section.
   virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI);
 
+  /// Emit the a pseudo probe into the current section.
+  virtual void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
+                               uint64_t Attr,
+                               const MCPseudoProbeInlineStack &InlineStack);
+
   /// Set the bundle alignment mode from now on in the section.
   /// The argument is the power of 2 to which the alignment is set. The
   /// value 0 means turn the bundle alignment off.
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -64,7 +64,7 @@
     assert(this->Action != NoAction || this->CSAction != NoCSAction ||
            this->DebugInfoForProfiling || this->PseudoProbeForProfiling);
 
-    // Pseudo probe emission does work with -fdebug-info-for-profiling since
+    // Pseudo probe emission does not work with -fdebug-info-for-profiling since
     // they both use the discriminator field of debug lines but for different
     // purposes.
     if (this->DebugInfoForProfiling && this->PseudoProbeForProfiling) {
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
@@ -37,18 +37,27 @@
 class SampleProfileProber {
 public:
   // Give an empty module id when the prober is not used for instrumentation.
-  SampleProfileProber(Function &F);
+  SampleProfileProber(Function &F, const std::string &CurModuleUniqueId);
   void instrumentOneFunc(Function &F, TargetMachine *TM);
 
 private:
   Function *getFunction() const { return F; }
+  uint64_t getFunctionHash() const { return FunctionHash; }
   uint32_t getBlockId(const BasicBlock *BB) const;
   uint32_t getCallsiteId(const Instruction *Call) const;
+  void computeCFGHash();
   void computeProbeIdForBlocks();
   void computeProbeIdForCallsites();
 
   Function *F;
 
+  /// The current module ID that is used to name a static object as a comdat
+  /// group.
+  std::string CurModuleUniqueId;
+
+  /// A CFG hash code used to identify a function code changes.
+  uint64_t FunctionHash;
+
   /// Map basic blocks to the their pseudo probe ids.
   BlockIdMap BlockProbeIds;
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -14,6 +14,7 @@
 #include "CodeViewDebug.h"
 #include "DwarfDebug.h"
 #include "DwarfException.h"
+#include "PseudoProbePrinter.h"
 #include "WasmException.h"
 #include "WinCFGuard.h"
 #include "WinException.h"
@@ -77,6 +78,7 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PseudoProbe.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -333,6 +335,9 @@
     }
   }
 
+  if (M.getNamedMetadata(PseudoProbeDescMetadataName))
+    PP = new PseudoProbeHandler(this, &M);
+
   switch (MAI->getExceptionHandlingType()) {
   case ExceptionHandling::SjLj:
   case ExceptionHandling::DwarfCFI:
@@ -1086,6 +1091,15 @@
   OutStreamer->PopSection();
 }
 
+void AsmPrinter::emitPseudoProbe(const MachineInstr &MI) {
+  auto GUID = MI.getOperand(0).getImm();
+  auto Index = MI.getOperand(1).getImm();
+  auto Type = MI.getOperand(2).getImm();
+  auto Attr = MI.getOperand(3).getImm();
+  DILocation *DebugLoc = MI.getDebugLoc();
+  PP->emitPseudoProbe(GUID, Index, Type, Attr, DebugLoc);
+}
+
 void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) {
   if (!MF.getTarget().Options.EmitStackSizeSection)
     return;
@@ -1219,6 +1233,9 @@
       case TargetOpcode::KILL:
         if (isVerbose()) emitKill(&MI, *this);
         break;
+      case TargetOpcode::PSEUDO_PROBE:
+        emitPseudoProbe(MI);
+        break;
       default:
         emitInstruction(&MI);
         if (CanDoExtraAnalysis) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt b/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt
--- a/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -21,6 +21,7 @@
   EHStreamer.cpp
   ErlangGCPrinter.cpp
   OcamlGCPrinter.cpp
+  PseudoProbePrinter.cpp
   WinCFGuard.cpp
   WinException.cpp
   CodeViewDebug.cpp
diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
new file mode 100644
--- /dev/null
+++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
@@ -0,0 +1,41 @@
+//===- PseudoProbePrinter.h - Pseudo probe encoding support -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing pseudo probe info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_PSEUDOPROBEPRINTER_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_PSEUDOPROBEPRINTER_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class MCStreamer;
+class Module;
+class DILocation;
+
+class PseudoProbeHandler {
+  // Target of pseudo probe emission.
+  AsmPrinter *Asm;
+  // Name to GUID map
+  DenseMap<StringRef, uint64_t> Names;
+
+public:
+  PseudoProbeHandler(AsmPrinter *A, Module *M);
+  void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
+                       uint64_t Attr, const DILocation *DebugLoc);
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+
+} // namespace llvm
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_PSEUDOPROBEPRINTER_H
diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
@@ -0,0 +1,82 @@
+//===- llvm/CodeGen/PseudoProbePrinter.cpp - Pseudo Probe Emission -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing pseudo probe info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PseudoProbePrinter.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PseudoProbe.h"
+#include "llvm/MC/MCPseudoProbe.h"
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pseudoprobe"
+
+PseudoProbeHandler::PseudoProbeHandler(AsmPrinter *A, Module *M) : Asm(A) {
+  NamedMDNode *FuncInfo = M->getNamedMetadata(PseudoProbeDescMetadataName);
+  assert(FuncInfo && "Pseudo probe descriptors are missing");
+  for (const auto *Operand : FuncInfo->operands()) {
+    const auto *MD = cast<MDNode>(Operand);
+    auto GUID =
+        mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))->getZExtValue();
+    auto Name = cast<MDString>(MD->getOperand(2))->getString();
+    // We may see pairs with same name but different GUIDs here in LTO mode, due
+    // to static same-named functions inlined from other modules into this
+    // module. Function profiles with the same name will be merged no matter
+    // whether they are collected on the same function. Therefore we just pick
+    // up the last <Name, GUID> pair here to represent the same-named function
+    // collection and all probes from the collection will be merged into a
+    // single profile eventually.
+    Names[Name] = GUID;
+  }
+
+  LLVM_DEBUG(dump());
+}
+
+void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
+                                         uint64_t Type, uint64_t Attr,
+                                         const DILocation *DebugLoc) {
+  // Gather all the inlined-at nodes.
+  // When it's done ReversedInlineStack looks like ([66, B], [88, A])
+  // which means, Function A inlines function B at calliste with a probe id 88,
+  // and B inlines C at probe 66 where C is represented by Guid.
+  SmallVector<InlineSite, 8> ReversedInlineStack;
+  auto *InlinedAt = DebugLoc ? DebugLoc->getInlinedAt() : nullptr;
+  while (InlinedAt) {
+    const DISubprogram *SP = InlinedAt->getScope()->getSubprogram();
+    // Use linkage name for C++ if possible.
+    auto Name = SP->getLinkageName();
+    if (Name.empty())
+      Name = SP->getName();
+    assert(Names.count(Name) && "Pseudo probe descriptor missing for function");
+    uint64_t CallerGuid = Names[Name];
+    uint64_t CallerProbeId = PseudoProbeDwarfDiscriminator::extractProbeIndex(
+        InlinedAt->getDiscriminator());
+    ReversedInlineStack.push_back({CallerGuid, CallerProbeId});
+    InlinedAt = InlinedAt->getInlinedAt();
+  }
+
+  SmallVector<InlineSite, 8> InlineStack(ReversedInlineStack.rbegin(),
+                                         ReversedInlineStack.rend());
+  Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, InlineStack);
+}
+
+#ifndef NDEBUG
+void PseudoProbeHandler::dump() const {
+  dbgs() << "\n=============================\n";
+  dbgs() << "\nFunction Name to GUID map:\n";
+  dbgs() << "\n=============================\n";
+  for (const auto &Item : Names)
+    dbgs() << "Func: " << Item.first << "   GUID: " << Item.second << "\n";
+}
+#endif
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -40,6 +40,7 @@
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/PseudoProbe.h"
 #include "llvm/IR/Type.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -316,6 +317,29 @@
     }
   }
 
+  if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) {
+    // Emit a descriptor for every function including functions that have an
+    // available external linkage. We may not want this for imported functions
+    // that has code in another thinLTO module but we don't have a good way to
+    // tell them apart from inline functions defined in header files. Therefore
+    // we put each descriptor in a separate comdat section and rely on the
+    // linker to deduplicate.
+    for (const auto *Operand : FuncInfo->operands()) {
+      const auto *MD = cast<MDNode>(Operand);
+      auto *GUID = mdconst::dyn_extract<ConstantInt>(MD->getOperand(0));
+      auto *Hash = mdconst::dyn_extract<ConstantInt>(MD->getOperand(1));
+      auto *Name = cast<MDString>(MD->getOperand(2));
+      auto *S = C.getObjectFileInfo()->getPseudoProbeDescSection(
+          TM->getFunctionSections() ? Name->getString() : StringRef());
+
+      Streamer.SwitchSection(S);
+      Streamer.emitInt64(GUID->getZExtValue());
+      Streamer.emitInt64(Hash->getZExtValue());
+      Streamer.emitULEB128IntValue(Name->getString().size());
+      Streamer.emitBytes(Name->getString());
+    }
+  }
+
   unsigned Version = 0;
   unsigned Flags = 0;
   StringRef Section;
diff --git a/llvm/lib/IR/MDBuilder.cpp b/llvm/lib/IR/MDBuilder.cpp
--- a/llvm/lib/IR/MDBuilder.cpp
+++ b/llvm/lib/IR/MDBuilder.cpp
@@ -305,3 +305,13 @@
   };
   return MDNode::get(Context, Vals);
 }
+
+MDNode *MDBuilder::createPseudoProbeDesc(uint64_t GUID, uint64_t Hash,
+                                         Function *F) {
+  auto *Int64Ty = Type::getInt64Ty(Context);
+  SmallVector<Metadata *, 3> Ops(3);
+  Ops[0] = createConstant(ConstantInt::get(Int64Ty, GUID));
+  Ops[1] = createConstant(ConstantInt::get(Int64Ty, Hash));
+  Ops[2] = createString(F->getName());
+  return MDNode::get(Context, Ops);
+}
diff --git a/llvm/lib/MC/CMakeLists.txt b/llvm/lib/MC/CMakeLists.txt
--- a/llvm/lib/MC/CMakeLists.txt
+++ b/llvm/lib/MC/CMakeLists.txt
@@ -32,6 +32,7 @@
   MCObjectFileInfo.cpp
   MCObjectStreamer.cpp
   MCObjectWriter.cpp
+  MCPseudoProbe.cpp
   MCRegisterInfo.cpp
   MCSchedule.cpp
   MCSection.cpp
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -24,6 +24,7 @@
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCPseudoProbe.h"
 #include "llvm/MC/MCRegister.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSectionMachO.h"
@@ -350,6 +351,10 @@
 
   void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
 
+  void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
+                       uint64_t Attr,
+                       const MCPseudoProbeInlineStack &InlineStack) override;
+
   void emitBundleAlignMode(unsigned AlignPow2) override;
   void emitBundleLock(bool AlignToEnd) override;
   void emitBundleUnlock() override;
@@ -2122,6 +2127,18 @@
   EmitEOL();
 }
 
+void MCAsmStreamer::emitPseudoProbe(
+    uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attr,
+    const MCPseudoProbeInlineStack &InlineStack) {
+  OS << "\t.pseudoprobe\t" << Guid << " " << Index << " " << Type << " "
+     << Attr;
+  // Emit inline stack like
+  //  @ GUIDmain:3 @ GUIDCaller:1 @ GUIDDirectCaller:11
+  for (const auto &Site : InlineStack)
+    OS << " @ " << std::get<0>(Site) << ":" << std::get<1>(Site);
+  EmitEOL();
+}
+
 void MCAsmStreamer::emitBundleAlignMode(unsigned AlignPow2) {
   OS << "\t.bundle_align_mode " << AlignPow2;
   EmitEOL();
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -383,6 +383,8 @@
     return cast<MCCVInlineLineTableFragment>(F).getContents().size();
   case MCFragment::FT_CVDefRange:
     return cast<MCCVDefRangeFragment>(F).getContents().size();
+  case MCFragment::FT_PseudoProbe:
+    return cast<MCPseudoProbeAddrFragment>(F).getContents().size();
   case MCFragment::FT_Dummy:
     llvm_unreachable("Should not have been added");
   }
@@ -704,6 +706,11 @@
     OS << DRF.getContents();
     break;
   }
+  case MCFragment::FT_PseudoProbe: {
+    const MCPseudoProbeAddrFragment &PF = cast<MCPseudoProbeAddrFragment>(F);
+    OS << PF.getContents();
+    break;
+  }
   case MCFragment::FT_Dummy:
     llvm_unreachable("Should not have been added");
   }
@@ -915,6 +922,12 @@
         Contents = DF.getContents();
         break;
       }
+      case MCFragment::FT_PseudoProbe: {
+        MCPseudoProbeAddrFragment &PF = cast<MCPseudoProbeAddrFragment>(Frag);
+        Fixups = PF.getFixups();
+        Contents = PF.getContents();
+        break;
+      }
       }
       for (const MCFixup &Fixup : Fixups) {
         uint64_t FixedValue;
@@ -1170,6 +1183,27 @@
   return OldSize != F.getContents().size();
 }
 
+bool MCAssembler::relaxPseudoProbeAddr(MCAsmLayout &Layout,
+                                       MCPseudoProbeAddrFragment &PF) {
+  uint64_t OldSize = PF.getContents().size();
+  int64_t AddrDelta;
+  bool Abs = PF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout);
+  assert(Abs && "We created a pseudo probe with an invalid expression");
+  (void)Abs;
+  SmallVectorImpl<char> &Data = PF.getContents();
+  Data.clear();
+  raw_svector_ostream OSE(Data);
+  PF.getFixups().clear();
+
+  // Relocations should not be needed in general except on RISC-V which we are
+  // not targeted for now.
+  assert(!getBackend().requiresDiffExpressionRelocations() &&
+         "cannot relax relocations");
+  // AddrDelta is a signed integer
+  encodeSLEB128(AddrDelta, OSE, OldSize);
+  return OldSize != Data.size();
+}
+
 bool MCAssembler::relaxFragment(MCAsmLayout &Layout, MCFragment &F) {
   switch(F.getKind()) {
   default:
@@ -1191,6 +1225,8 @@
     return relaxCVInlineLineTable(Layout, cast<MCCVInlineLineTableFragment>(F));
   case MCFragment::FT_CVDefRange:
     return relaxCVDefRange(Layout, cast<MCCVDefRangeFragment>(F));
+  case MCFragment::FT_PseudoProbe:
+    return relaxPseudoProbeAddr(Layout, cast<MCPseudoProbeAddrFragment>(F));
   }
 }
 
diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp
--- a/llvm/lib/MC/MCFragment.cpp
+++ b/llvm/lib/MC/MCFragment.cpp
@@ -309,6 +309,9 @@
     case FT_CVDefRange:
       delete cast<MCCVDefRangeFragment>(this);
       return;
+    case FT_PseudoProbe:
+      delete cast<MCPseudoProbeAddrFragment>(this);
+      return;
     case FT_Dummy:
       delete cast<MCDummyFragment>(this);
       return;
@@ -351,6 +354,9 @@
   case MCFragment::FT_SymbolId:    OS << "MCSymbolIdFragment"; break;
   case MCFragment::FT_CVInlineLines: OS << "MCCVInlineLineTableFragment"; break;
   case MCFragment::FT_CVDefRange: OS << "MCCVDefRangeTableFragment"; break;
+  case MCFragment::FT_PseudoProbe:
+    OS << "MCPseudoProbe";
+    break;
   case MCFragment::FT_Dummy: OS << "MCDummyFragment"; break;
   }
 
@@ -484,6 +490,12 @@
     }
     break;
   }
+  case MCFragment::FT_PseudoProbe: {
+    const auto *OF = cast<MCPseudoProbeAddrFragment>(this);
+    OS << "\n       ";
+    OS << " AddrDelta:" << OF->getAddrDelta();
+    break;
+  }
   case MCFragment::FT_Dummy:
     break;
   }
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -497,6 +497,10 @@
       Ctx->getELFSection(".eh_frame", EHSectionType, EHSectionFlags);
 
   StackSizesSection = Ctx->getELFSection(".stack_sizes", ELF::SHT_PROGBITS, 0);
+
+  PseudoProbeSection = Ctx->getELFSection(".pseudo_probe", DebugSecType, 0);
+  PseudoProbeDescSection =
+      Ctx->getELFSection(".pseudo_probe_desc", DebugSecType, 0);
 }
 
 void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
@@ -1025,3 +1029,41 @@
                             Flags, 0, GroupName, ElfSec.getUniqueID(),
                             cast<MCSymbolELF>(TextSec.getBeginSymbol()));
 }
+
+MCSection *
+MCObjectFileInfo::getPseudoProbeSection(const MCSection *TextSec) const {
+  if (Env == IsELF) {
+    const auto *ElfSec = static_cast<const MCSectionELF *>(TextSec);
+    // Create a separate section for probes that comes with a comdat function.
+    if (const MCSymbol *Group = ElfSec->getGroup()) {
+      auto *S = static_cast<MCSectionELF *>(PseudoProbeSection);
+      auto Flags = S->getFlags() | ELF::SHF_GROUP;
+      return Ctx->getELFSection(S->getName(), S->getType(), Flags,
+                                S->getEntrySize(), Group->getName());
+    }
+  }
+  return PseudoProbeSection;
+}
+
+MCSection *
+MCObjectFileInfo::getPseudoProbeDescSection(StringRef FuncName) const {
+  if (Env == IsELF) {
+    // Create a separate comdat group for each function's descriptor in order
+    // for the linker to deduplicate. The duplication, must be from different
+    // tranlation unit, can come from:
+    //  1. Inline functions defined in header files;
+    //  2. ThinLTO imported funcions;
+    //  3. Weak-linkage definitions.
+    // Use a concatenation of the section name and the function name as the
+    // group name so that descriptor-only groups won't be folded with groups of
+    // code.
+    if (TT.supportsCOMDAT() && !FuncName.empty()) {
+      auto *S = static_cast<MCSectionELF *>(PseudoProbeDescSection);
+      auto Flags = S->getFlags() | ELF::SHF_GROUP;
+      return Ctx->getELFSection(S->getName(), S->getType(), Flags,
+                                S->getEntrySize(),
+                                S->getName() + "_" + FuncName);
+    }
+  }
+  return PseudoProbeDescSection;
+}
diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -852,6 +852,9 @@
   // Dump out the dwarf file & directory tables and line tables.
   MCDwarfLineTable::Emit(this, getAssembler().getDWARFLinetableParams());
 
+  // Emit pseudo probes for the current module.
+  MCPseudoProbeTable::emit(this);
+
   // Update any remaining pending labels with empty data fragments.
   flushPendingLabels();
 
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -515,6 +515,7 @@
     DK_PRINT,
     DK_ADDRSIG,
     DK_ADDRSIG_SYM,
+    DK_PSEUDO_PROBE,
     DK_END
   };
 
@@ -678,6 +679,9 @@
   // .print <double-quotes-string>
   bool parseDirectivePrint(SMLoc DirectiveLoc);
 
+  // .pseudoprobe
+  bool parseDirectivePseudoProbe();
+
   // Directives to support address-significance tables.
   bool parseDirectiveAddrsig();
   bool parseDirectiveAddrsigSym();
@@ -2202,6 +2206,8 @@
       return parseDirectiveAddrsig();
     case DK_ADDRSIG_SYM:
       return parseDirectiveAddrsigSym();
+    case DK_PSEUDO_PROBE:
+      return parseDirectivePseudoProbe();
     }
 
     return Error(IDLoc, "unknown directive");
@@ -5520,6 +5526,7 @@
   DirectiveKindMap[".print"] = DK_PRINT;
   DirectiveKindMap[".addrsig"] = DK_ADDRSIG;
   DirectiveKindMap[".addrsig_sym"] = DK_ADDRSIG_SYM;
+  DirectiveKindMap[".pseudoprobe"] = DK_PSEUDO_PROBE;
 }
 
 MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
@@ -5775,6 +5782,69 @@
   return false;
 }
 
+bool AsmParser::parseDirectivePseudoProbe() {
+  int64_t Guid;
+  int64_t Index;
+  int64_t Type;
+  int64_t Attr;
+
+  if (getLexer().is(AsmToken::Integer)) {
+    if (parseIntToken(Guid, "unexpected token in '.pseudoprobe' directive"))
+      return true;
+  }
+
+  if (getLexer().is(AsmToken::Integer)) {
+    if (parseIntToken(Index, "unexpected token in '.pseudoprobe' directive"))
+      return true;
+  }
+
+  if (getLexer().is(AsmToken::Integer)) {
+    if (parseIntToken(Type, "unexpected token in '.pseudoprobe' directive"))
+      return true;
+  }
+
+  if (getLexer().is(AsmToken::Integer)) {
+    if (parseIntToken(Attr, "unexpected token in '.pseudoprobe' directive"))
+      return true;
+  }
+
+  // Parse inline stack like @ GUID:11:12 @ GUID:1:11 @ GUID:3:21
+  MCPseudoProbeInlineStack InlineStack;
+
+  while (getLexer().is(AsmToken::At)) {
+    // eat @
+    Lex();
+
+    int64_t CallerGuid = 0;
+    if (getLexer().is(AsmToken::Integer)) {
+      if (parseIntToken(CallerGuid,
+                        "unexpected token in '.pseudoprobe' directive"))
+        return true;
+    }
+
+    // eat colon
+    if (getLexer().is(AsmToken::Colon))
+      Lex();
+
+    int64_t CallerProbeId = 0;
+    if (getLexer().is(AsmToken::Integer)) {
+      if (parseIntToken(CallerProbeId,
+                        "unexpected token in '.pseudoprobe' directive"))
+        return true;
+    }
+
+    InlineSite Site = {CallerGuid, CallerProbeId};
+    InlineStack.push_back(Site);
+  }
+
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.pseudoprobe' directive"))
+    return true;
+
+  getStreamer().emitPseudoProbe(Guid, Index, Type, Attr, InlineStack);
+  return false;
+}
+
 // We are comparing pointers, but the pointers are relative to a single string.
 // Thus, this should always be deterministic.
 static int rewritesSort(const AsmRewrite *AsmRewriteA,
diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -0,0 +1,213 @@
+//===- lib/MC/MCPseudoProbe.cpp - Pseudo probe encoding support ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCPseudoProbe.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCStreamer.h"
+
+#define DEBUG_TYPE "mcpseudoprobe"
+
+using namespace llvm;
+
+#ifndef NDEBUG
+int MCPseudoProbeTable::DdgPrintIndent = 0;
+#endif
+
+static const MCExpr *buildSymbolDiff(MCObjectStreamer *MCOS, const MCSymbol *A,
+                                     const MCSymbol *B) {
+  MCContext &Context = MCOS->getContext();
+  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+  const MCExpr *ARef = MCSymbolRefExpr::create(A, Variant, Context);
+  const MCExpr *BRef = MCSymbolRefExpr::create(B, Variant, Context);
+  const MCExpr *AddrDelta =
+      MCBinaryExpr::create(MCBinaryExpr::Sub, ARef, BRef, Context);
+  return AddrDelta;
+}
+
+void MCPseudoProbe::emit(MCObjectStreamer *MCOS,
+                         const MCPseudoProbe *LastProbe) const {
+  // Emit Index
+  MCOS->emitULEB128IntValue(Index);
+  // Emit Type and the flag:
+  // Type (bit 0 to 3), with bit 4 to 6 for attributes.
+  // Flag (bit 7, 0 - code address, 1 - address delta). This indicates whether
+  // the following field is a symbolic code address or an address delta.
+  assert(Type <= 0xF && "Probe type too big to encode, exceeding 15");
+  assert(Attributes <= 0x7 &&
+         "Probe attributes too big to encode, exceeding 7");
+  uint8_t PackedType = Type | (Attributes << 4);
+  uint8_t Flag = LastProbe ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0;
+  MCOS->emitInt8(Flag | PackedType);
+
+  if (LastProbe) {
+    // Emit the delta between the address label and LastProbe.
+    const MCExpr *AddrDelta =
+        buildSymbolDiff(MCOS, Label, LastProbe->getLabel());
+    int64_t Delta;
+    if (AddrDelta->evaluateAsAbsolute(Delta, MCOS->getAssemblerPtr())) {
+      MCOS->emitSLEB128IntValue(Delta);
+    } else {
+      MCOS->insert(new MCPseudoProbeAddrFragment(AddrDelta));
+    }
+  } else {
+    // Emit label as a symbolic code address.
+    MCOS->emitSymbolValue(
+        Label, MCOS->getContext().getAsmInfo()->getCodePointerSize());
+  }
+
+  LLVM_DEBUG({
+    dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
+    dbgs() << "Probe: " << Index << "\n";
+  });
+}
+
+MCPseudoProbeInlineTree::~MCPseudoProbeInlineTree() {
+  for (auto &Inlinee : Inlinees)
+    delete Inlinee.second;
+}
+
+MCPseudoProbeInlineTree *
+MCPseudoProbeInlineTree::getOrAddNode(InlineSite Site) {
+  auto Iter = Inlinees.find(Site);
+  if (Iter == Inlinees.end()) {
+    auto *Node = new MCPseudoProbeInlineTree(std::get<0>(Site));
+    Inlinees[Site] = Node;
+    return Node;
+  } else {
+    return Iter->second;
+  }
+}
+
+void MCPseudoProbeInlineTree::addPseudoProbe(
+    const MCPseudoProbe &Probe, const MCPseudoProbeInlineStack &InlineStack) {
+  // The function should not be called on the root.
+  assert(isRoot() && "Should not be called on root");
+
+  // When it comes here, the input look like:
+  //    Probe: GUID of C, ...
+  //    InlineStack: [88, A], [66, B]
+  // which means, Function A inlines function B at call site with a probe id of
+  // 88, and B inlines C at probe 66. The tri-tree expects a tree path like {[0,
+  // A], [88, B], [66, C]} to locate the tree node where the probe should be
+  // added. Note that the edge [0, A] means A is the top-level function we are
+  // emitting probes for.
+
+  // Make a [0, A] edge.
+  // An empty inline stack means the function that the probe originates from
+  // is a top-level function.
+  InlineSite Top;
+  if (InlineStack.empty()) {
+    Top = InlineSite(Probe.getGuid(), 0);
+  } else {
+    Top = InlineSite(std::get<0>(InlineStack.front()), 0);
+  }
+
+  auto *Cur = getOrAddNode(Top);
+
+  // Make interior edges by walking the inline stack. Once it's done, Cur should
+  // point to the node that the probe originates from.
+  if (!InlineStack.empty()) {
+    auto Iter = InlineStack.begin();
+    auto Index = std::get<1>(*Iter);
+    Iter++;
+    for (; Iter != InlineStack.end(); Iter++) {
+      // Make an edge by using the previous probe id and current GUID.
+      Cur = Cur->getOrAddNode(InlineSite(std::get<0>(*Iter), Index));
+      Index = std::get<1>(*Iter);
+    }
+    Cur = Cur->getOrAddNode(InlineSite(Probe.getGuid(), Index));
+  }
+
+  Cur->Probes.push_back(Probe);
+}
+
+void MCPseudoProbeInlineTree::emit(MCObjectStreamer *MCOS,
+                                   const MCPseudoProbe *&LastProbe) {
+  LLVM_DEBUG({
+    dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
+    dbgs() << "Group [\n";
+    MCPseudoProbeTable::DdgPrintIndent += 2;
+  });
+  // Emit probes grouped by GUID.
+  if (Guid != 0) {
+    LLVM_DEBUG({
+      dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
+      dbgs() << "GUID: " << Guid << "\n";
+    });
+    // Emit Guid
+    MCOS->emitInt64(Guid);
+    // Emit number of probes in this node
+    MCOS->emitULEB128IntValue(Probes.size());
+    // Emit number of direct inlinees
+    MCOS->emitULEB128IntValue(Inlinees.size());
+    // Emit probes in this group
+    for (const auto &Probe : Probes) {
+      Probe.emit(MCOS, LastProbe);
+      LastProbe = &Probe;
+    }
+  } else {
+    assert(Probes.empty() && "Root should not have probes");
+  }
+
+  // Emit descendent
+  for (const auto &Inlinee : Inlinees) {
+    if (Guid) {
+      // Emit probe index
+      MCOS->emitULEB128IntValue(std::get<1>(Inlinee.first));
+      LLVM_DEBUG({
+        dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
+        dbgs() << "InlineSite: " << std::get<1>(Inlinee.first) << "\n";
+      });
+    }
+    // Emit the group
+    Inlinee.second->emit(MCOS, LastProbe);
+  }
+
+  LLVM_DEBUG({
+    MCPseudoProbeTable::DdgPrintIndent -= 2;
+    dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
+    dbgs() << "]\n";
+  });
+}
+
+void MCPseudoProbeSection::emit(MCObjectStreamer *MCOS) {
+  MCContext &Ctx = MCOS->getContext();
+
+  for (auto &ProbeSec : MCProbeDivisions) {
+    const MCPseudoProbe *LastProbe = nullptr;
+    if (auto *S =
+            Ctx.getObjectFileInfo()->getPseudoProbeSection(ProbeSec.first)) {
+      // Switch to the .pseudoprobe section or a comdat group.
+      MCOS->SwitchSection(S);
+      // Emit probes grouped by GUID.
+      ProbeSec.second.emit(MCOS, LastProbe);
+    }
+  }
+}
+
+//
+// This emits the pseudo probe tables.
+//
+void MCPseudoProbeTable::emit(MCObjectStreamer *MCOS) {
+  MCContext &Ctx = MCOS->getContext();
+  auto &ProbeTable = Ctx.getMCPseudoProbeTable();
+
+  // Bail out early so we don't switch to the pseudo_probe section needlessly
+  // and in doing so create an unnecessary (if empty) section.
+  auto &ProbeSections = ProbeTable.getProbeSections();
+  if (ProbeSections.empty())
+    return;
+
+  LLVM_DEBUG(MCPseudoProbeTable::DdgPrintIndent = 0);
+
+  // Put out the probe.
+  ProbeSections.emit(MCOS);
+}
diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp
--- a/llvm/lib/MC/MCStreamer.cpp
+++ b/llvm/lib/MC/MCStreamer.cpp
@@ -22,6 +22,7 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCPseudoProbe.h"
 #include "llvm/MC/MCRegister.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSection.h"
@@ -1042,6 +1043,25 @@
       visitUsedExpr(*Inst.getOperand(i).getExpr());
 }
 
+void MCStreamer::emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
+                                 uint64_t Attr,
+                                 const MCPseudoProbeInlineStack &InlineStack) {
+  auto &Context = getContext();
+
+  // Create a symbol at in the current section for use in the probe.
+  MCSymbol *ProbeSym = Context.createTempSymbol();
+
+  // Set the value of the symbol to use for the MCPseudoProbe.
+  emitLabel(ProbeSym);
+
+  // Create a (local) probe entry with the symbol.
+  MCPseudoProbe Probe(ProbeSym, Guid, Index, Type, Attr);
+
+  // Add the probe entry to this section's entries.
+  Context.getMCPseudoProbeTable().getProbeSections().addPseudoProbe(
+      getCurrentSectionOnly(), Probe, InlineStack);
+}
+
 void MCStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo,
                                         unsigned Size) {
   // Get the Hi-Lo expression.
diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -35,12 +35,46 @@
 STATISTIC(ArtificialDbgLine,
           "Number of probes that have an artificial debug line");
 
-SampleProfileProber::SampleProfileProber(Function &Func) : F(&Func) {
+SampleProfileProber::SampleProfileProber(Function &Func,
+                                         const std::string &CurModuleUniqueId)
+    : F(&Func), CurModuleUniqueId(CurModuleUniqueId) {
   BlockProbeIds.clear();
   CallProbeIds.clear();
   LastProbeId = (uint32_t)PseudoProbeReservedId::Last;
   computeProbeIdForBlocks();
   computeProbeIdForCallsites();
+  computeCFGHash();
+}
+
+// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
+// value of each BB in the CFG. The higher 32 bits record the number of edges
+// preceded by the number of indirect calls.
+// This is derived from FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash().
+void SampleProfileProber::computeCFGHash() {
+  std::vector<uint8_t> Indexes;
+  JamCRC JC;
+  for (auto &BB : *F) {
+    auto *TI = BB.getTerminator();
+    for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
+      auto *Succ = TI->getSuccessor(I);
+      auto Index = getBlockId(Succ);
+      for (int J = 0; J < 4; J++)
+        Indexes.push_back((uint8_t)(Index >> (J * 8)));
+    }
+  }
+
+  JC.update(Indexes);
+
+  FunctionHash = (uint64_t)CallProbeIds.size() << 48 |
+                 (uint64_t)Indexes.size() << 32 | JC.getCRC();
+  // Reserve bit 60-63 for other information purpose.
+  FunctionHash &= 0x0FFFFFFFFFFFFFFF;
+  assert(FunctionHash && "Function checksum should not be zero");
+  LLVM_DEBUG(dbgs() << "\nFunction Hash Computation for " << F->getName()
+                    << ":\n"
+                    << " CRC = " << JC.getCRC() << ", Edges = "
+                    << Indexes.size() << ", ICSites = " << CallProbeIds.size()
+                    << ", Hash = " << FunctionHash << "\n");
 }
 
 void SampleProfileProber::computeProbeIdForBlocks() {
@@ -150,14 +184,50 @@
       Call->setDebugLoc(DIL);
     }
   }
+
+  // Create module-level metadata that contains function info necessary to
+  // synthesize probe-based sample counts,  which are
+  // - FunctionGUID
+  // - FunctionHash.
+  // - FunctionName
+  auto Hash = getFunctionHash();
+  auto *MD = MDB.createPseudoProbeDesc(Guid, Hash, &F);
+  auto *NMD = M->getNamedMetadata(PseudoProbeDescMetadataName);
+  assert(NMD && "llvm.pseudo_probe_desc should be pre-created");
+  NMD->addOperand(MD);
+
+  // Preserve a comdat group to hold all probes materialized later. This
+  // allows that when the function is considered dead and removed, the
+  // materialized probes are disposed too.
+  // Imported functions are defined in another module. They do not need
+  // the following handling since same care will be taken for them in their
+  // original module. The pseudo probes inserted into an imported functions
+  // above will naturally not be emitted since the imported function is free
+  // from object emission. However they will be emitted together with the
+  // inliner functions that the imported function is inlined into. We are not
+  // creating a comdat group for an import function since it's useless anyway.
+  if (!F.isDeclarationForLinker()) {
+    if (TM) {
+      auto Triple = TM->getTargetTriple();
+      if (Triple.supportsCOMDAT() && TM->getFunctionSections()) {
+        GetOrCreateFunctionComdat(F, Triple, CurModuleUniqueId);
+      }
+    }
+  }
 }
 
 PreservedAnalyses SampleProfileProbePass::run(Module &M,
                                               ModuleAnalysisManager &AM) {
+  auto ModuleId = getUniqueModuleId(&M);
+  // Create the pseudo probe desc metadata beforehand.
+  // Note that modules with only data but no functions will require this to
+  // be set up so that they will be known as probed later.
+  M.getOrInsertNamedMetadata(PseudoProbeDescMetadataName);
+
   for (auto &F : M) {
     if (F.isDeclaration())
       continue;
-    SampleProfileProber ProbeManager(F);
+    SampleProfileProber ProbeManager(F, ModuleId);
     ProbeManager.instrumentOneFunc(F, TM);
   }
 
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll
@@ -0,0 +1,95 @@
+; REQUIRES: x86_64-linux
+; RUN: opt < %s -passes='pseudo-probe,cgscc(inline)' -function-sections -mtriple=x86_64-unknown-linux-gnu -S -o %t
+; RUN: FileCheck %s < %t --check-prefix=CHECK-IL
+; RUN: llc -pseudo-probe-for-profiling -function-sections <%t -filetype=asm -o %t1
+; RUN: FileCheck %s < %t1 --check-prefix=CHECK-ASM
+; RUN: llc -pseudo-probe-for-profiling -function-sections <%t -filetype=obj -o %t2
+; RUN: llvm-objdump --section-headers  %t2 | FileCheck %s --check-prefix=CHECK-OBJ
+; RUN: llvm-mc -filetype=asm <%t1 -o %t3
+; RUN: FileCheck %s < %t3 --check-prefix=CHECK-ASM
+; RUN: llvm-mc -filetype=obj <%t1 -o %t4
+; RUN: llvm-objdump --section-headers  %t4 | FileCheck %s --check-prefix=CHECK-OBJ
+
+define dso_local void @foo2() !dbg !7 {
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0), !dbg ![[#]]
+; CHECK-ASM: .pseudoprobe	[[#GUID1:]] 1 0 0
+  ret void, !dbg !10
+}
+
+define dso_local void @foo() #0 !dbg !11 {
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0), !dbg ![[#]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0), !dbg ![[#DL1:]]
+; CHECK-ASM: .pseudoprobe	[[#GUID2:]] 1 0 0
+; CHECK-ASM: .pseudoprobe	[[#GUID1]] 1 0 0 @ [[#GUID2]]:2
+  call void @foo2(), !dbg !12
+  ret void, !dbg !13
+}
+
+define dso_local i32 @entry() !dbg !14 {
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0), !dbg ![[#]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0), !dbg ![[#DL2:]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0), !dbg ![[#DL3:]]
+; CHECK-ASM: .pseudoprobe	[[#GUID3:]] 1 0 0
+; CHECK-ASM: .pseudoprobe	[[#GUID2]] 1 0 0 @ [[#GUID3]]:2
+; CHECK-ASM: .pseudoprobe	[[#GUID1]] 1 0 0 @ [[#GUID3]]:2 @ [[#GUID2]]:2
+  call void @foo(), !dbg !18
+  ret i32 0, !dbg !19
+}
+
+
+; CHECK-IL: ![[#SCOPE1:]] = distinct !DISubprogram(name: "foo2"
+; CHECK-IL: ![[#SCOPE2:]] = distinct !DISubprogram(name: "foo"
+; CHECK-IL: ![[#DL1]] = !DILocation(line: 3, column: 1,  scope: ![[#SCOPE1]], inlinedAt: ![[#INL1:]])
+; CHECK-IL: ![[#INL1]] = distinct !DILocation(line: 7, column: 3, scope: ![[#BL1:]])
+;; A discriminator of 134217751 which is 0x8000017 in hexdecimal, stands for a direct call probe
+;; with an index of 2.
+; CHECK-IL: ![[#BL1]] = !DILexicalBlockFile(scope: ![[#SCOPE2]], file: !1, discriminator: 134217751)
+; CHECK-IL: ![[#SCOPE3:]] = distinct !DISubprogram(name: "entry"
+; CHECK-IL: ![[#DL2]] = !DILocation(line: 7, column: 3,  scope: ![[#SCOPE2]], inlinedAt: ![[#INL2:]])
+; CHECK-IL: ![[#INL2]] = distinct !DILocation(line: 11, column: 3, scope: ![[#BL2:]])
+; CHECK-IL: ![[#BL2]] = !DILexicalBlockFile(scope: ![[#SCOPE3]], file: !1, discriminator: 134217751)
+; CHECK-IL: ![[#DL3]] = !DILocation(line: 3, column: 1,  scope: ![[#SCOPE1]], inlinedAt: ![[#INL3:]])
+; CHECK-IL: ![[#INL3]] = distinct !DILocation(line: 7, column: 3,  scope: ![[#BL1]], inlinedAt: ![[#INL2]])
+
+
+; Check the generation of .pseudo_probe_desc section
+; CHECK-ASM: .section .pseudo_probe_desc,"G",@progbits,.pseudo_probe_desc_foo2,comdat
+; CHECK-ASM-NEXT: .quad [[#GUID1]]
+; CHECK-ASM-NEXT: .quad [[#HASH1:]]
+; CHECK-ASM-NEXT: .byte	4
+; CHECK-ASM-NEXT: .ascii "foo2"
+; CHECK-ASM-NEXT: .section .pseudo_probe_desc,"G",@progbits,.pseudo_probe_desc_foo,comdat
+; CHECK-ASM-NEXT: .quad [[#GUID2]]
+; CHECK-ASM-NEXT: .quad [[#HASH2:]]
+; CHECK-ASM-NEXT: .byte	3
+; CHECK-ASM-NEXT: .ascii "foo"
+; CHECK-ASM-NEXT: .section .pseudo_probe_desc,"G",@progbits,.pseudo_probe_desc_entry,comdat
+; CHECK-ASM-NEXT: .quad [[#GUID3]]
+; CHECK-ASM-NEXT: .quad [[#HASH3:]]
+; CHECK-ASM-NEXT: .byte	5
+; CHECK-ASM-NEXT: .ascii "entry"
+
+; CHECK-OBJ: .pseudo_probe_desc
+; CHECK-OBJ: .pseudo_probe
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, emissionKind: FullDebug)
+!1 = !DIFile(filename: "foo.c", directory: "any")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!7 = distinct !DISubprogram(name: "foo2", scope: !1, file: !1, line: 2, type: !8, scopeLine: 2, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!8 = !DISubroutineType(types: !9)
+!9 = !{null}
+!10 = !DILocation(line: 3, column: 1, scope: !7)
+!11 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 6, type: !8, scopeLine: 6, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!12 = !DILocation(line: 7, column: 3, scope: !11)
+!13 = !DILocation(line: 8, column: 1, scope: !11)
+!14 = distinct !DISubprogram(name: "entry", scope: !1, file: !1, line: 10, type: !15, scopeLine: 10, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!15 = !DISubroutineType(types: !16)
+!16 = !{!17}
+!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!18 = !DILocation(line: 11, column: 3, scope: !14)
+!19 = !DILocation(line: 12, column: 3, scope: !14)
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll
@@ -1,8 +1,14 @@
-; REQUIRES: x86-registered-target
+; REQUIRES: x86_64-linux
 ; RUN: opt < %s -passes=pseudo-probe -function-sections -S -o %t
 ; RUN: FileCheck %s < %t --check-prefix=CHECK-IL
 ; RUN: llc %t -pseudo-probe-for-profiling -stop-after=pseudo-probe-inserter -o - | FileCheck %s --check-prefix=CHECK-MIR
-;
+; RUN: llc %t -pseudo-probe-for-profiling -function-sections -filetype=asm -o %t1
+; RUN: FileCheck %s < %t1 --check-prefix=CHECK-ASM
+; RUN: llc %t -pseudo-probe-for-profiling -function-sections -filetype=obj -o %t2
+; RUN: llvm-objdump --section-headers  %t2 | FileCheck %s --check-prefix=CHECK-OBJ
+; RUN: llvm-mc %t1 -filetype=obj -o %t3
+; RUN: llvm-objdump --section-headers  %t3 | FileCheck %s --check-prefix=CHECK-OBJ
+
 ;; Check the generation of pseudoprobe intrinsic call.
 
 define void @foo(i32 %x) !dbg !3 {
@@ -10,18 +16,23 @@
   %cmp = icmp eq i32 %x, 0
 ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0), !dbg ![[#FAKELINE:]]
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID:]], 1, 0, 0
+; CHECK-ASM: .pseudoprobe	[[#GUID:]] 1 0 0
   br i1 %cmp, label %bb1, label %bb2
 
 bb1:
 ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0), !dbg ![[#FAKELINE]]
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 3, 0, 0
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0
+; CHECK-ASM: .pseudoprobe	[[#GUID]] 3 0 0
+; CHECK-ASM: .pseudoprobe	[[#GUID]] 4 0 0
   br label %bb3
 
 bb2:
 ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0), !dbg ![[#FAKELINE]]
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 2, 0, 0
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0
+; CHECK-ASM: .pseudoprobe	[[#GUID]] 2 0 0
+; CHECK-ASM: .pseudoprobe	[[#GUID]] 4 0 0
   br label %bb3
 
 bb3:
@@ -35,13 +46,16 @@
 entry:
 ; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0)
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID2:]], 1, 0, 0
+; CHECK-ASM: .pseudoprobe	[[#GUID2:]] 1 0 0
 ; Check pseudo_probe metadata attached to the indirect call instruction.
 ; CHECK-IL: call void %f(i32 1), !dbg ![[#PROBE0:]]
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID2]], 2, 1, 0
+; CHECK-ASM: .pseudoprobe	[[#GUID2]] 2 1 0
   call void %f(i32 1), !dbg !13
 ; Check pseudo_probe metadata attached to the direct call instruction.
 ; CHECK-IL: call void @bar(i32 1), !dbg ![[#PROBE1:]]
 ; CHECK-MIR: PSEUDO_PROBE	[[#GUID2]], 3, 2, 0
+; CHECK-ASM: .pseudoprobe	[[#GUID2]] 3 2 0
   call void @bar(i32 1)
   ret void
 }
@@ -58,6 +72,20 @@
 ;; with an index of 3.
 ; CHECK-IL: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 134217759)
 
+; Check the generation of .pseudo_probe_desc section
+; CHECK-ASM: .section .pseudo_probe_desc,"G",@progbits,.pseudo_probe_desc_foo,comdat
+; CHECK-ASM-NEXT: .quad [[#GUID]]
+; CHECK-ASM-NEXT: .quad [[#HASH:]]
+; CHECK-ASM-NEXT: .byte  3
+; CHECK-ASM-NEXT: .ascii	"foo"
+; CHECK-ASM-NEXT: .section  .pseudo_probe_desc,"G",@progbits,.pseudo_probe_desc_foo2,comdat
+; CHECK-ASM-NEXT: .quad [[#GUID2]]
+; CHECK-ASM-NEXT: .quad [[#HASH2:]]
+; CHECK-ASM-NEXT: .byte 4
+; CHECK-ASM-NEXT: .ascii	"foo2"
+
+; CHECK-OBJ-COUNT-2: .pseudo_probe_desc
+; CHECK-OBJ-COUNT-2: .pseudo_probe
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !10}