diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -589,27 +589,24 @@
 }
 
 void CSProfileGenerator::computeSizeForProfiledFunctions() {
-  // Hash map to deduplicate the function range and the item is a pair of
-  // function start and end offset.
-  std::unordered_map<uint64_t, uint64_t> AggregatedRanges;
+  std::unordered_set<const BinaryFunction *> ProfiledFunctions;
+
   // Go through all the ranges in the CS counters, use the start of the range to
-  // look up the function it belongs and record the function range.
+  // look up the function it belongs and record the function.
   for (const auto &CI : SampleCounters) {
     for (const auto &Item : CI.second.RangeCounter) {
       // FIXME: Filter the bogus crossing function range.
       uint64_t StartOffset = Item.first.first;
-      // Note that a function can be spilt into multiple ranges, so get all
-      // ranges of the function.
-      for (const auto &Range : Binary->getRangesForOffset(StartOffset))
-        AggregatedRanges[Range.first] = Range.second;
+      if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset))
+        ProfiledFunctions.insert(FRange->Func);
     }
   }
 
-  for (const auto &I : AggregatedRanges) {
-    uint64_t StartOffset = I.first;
-    uint64_t EndOffset = I.second;
-    Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset);
-  }
+  for (auto *Func : ProfiledFunctions)
+    Binary->computeInlinedContextSizeForFunc(Func);
+
+  // Flush the symbolizer to save memory.
+  Binary->flushSymbolizer();
 }
 
 void CSProfileGenerator::generateLineNumBasedProfile() {
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -166,14 +166,14 @@
   // their remaining probes.
   void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder);
 
-  void dump() { RootContext.dumpTree(); }
-
-private:
   using ProbeFrameStack = SmallVector<std::pair<StringRef, uint32_t>>;
   void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder,
                               MCDecodedPseudoProbeInlineTree &ProbeNode,
                               ProbeFrameStack &Context);
 
+  void dump() { RootContext.dumpTree(); }
+
+private:
   // Root node for context trie tree, node that this is a reverse context trie
   // with callee as parent and caller as child. This way we can traverse from
   // root to find the best/longest matching context if an exact match does not
@@ -256,6 +256,9 @@
   // Pseudo probe decoder
   MCPseudoProbeDecoder ProbeDecoder;
 
+  // Function name to probe frame map for top-level outlined functions.
+  StringMap<MCDecodedPseudoProbeInlineTree *> TopLevelProbeFrameMap;
+
   bool UsePseudoProbes = false;
 
   bool UseFSDiscriminator = false;
@@ -477,6 +480,8 @@
     return Stack.back();
   }
 
+  void flushSymbolizer() { Symbolizer->flush(); }
+
   // Compare two addresses' inline context
   bool inlineContextEqual(uint64_t Add1, uint64_t Add2);
 
@@ -491,6 +496,8 @@
   void computeInlinedContextSizeForRange(uint64_t StartOffset,
                                          uint64_t EndOffset);
 
+  void computeInlinedContextSizeForFunc(const BinaryFunction *Func);
+
   const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const {
     return ProbeDecoder.getCallProbeForAddr(Address);
   }
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -219,10 +219,6 @@
   // Disassemble the text sections.
   disassemble(Obj);
 
-  // Track size for optimized inlinees when probe is available
-  if (UsePseudoProbes && TrackFuncContextSize)
-    FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder);
-
   // Use function start and return address to infer prolog and epilog
   ProEpilogTracker.inferPrologOffsets(StartOffset2FuncRangeMap);
   ProEpilogTracker.inferEpilogOffsets(RetOffsets);
@@ -349,6 +345,17 @@
     }
   }
 
+  // Build TopLevelProbeFrameMap to track size for optimized inlinees when probe
+  // is available
+  if (UsePseudoProbes && TrackFuncContextSize) {
+    for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) {
+      auto *Frame = Child.second.get();
+      StringRef FuncName =
+          ProbeDecoder.getFuncDescForGUID(Frame->Guid)->FuncName;
+      TopLevelProbeFrameMap[FuncName] = Frame;
+    }
+  }
+
   if (ShowPseudoProbe)
     ProbeDecoder.printGUID2FuncDescMap(outs());
 }
@@ -747,6 +754,26 @@
   } while (IP.advance() && IP.Address < RangeEnd);
 }
 
+void ProfiledBinary::computeInlinedContextSizeForFunc(
+    const BinaryFunction *Func) {
+  // Note that a function can be spilt into multiple ranges, so compute for all
+  // ranges of the function.
+  for (const auto &Range : Func->Ranges)
+    computeInlinedContextSizeForRange(Range.first, Range.second);
+
+  // Track optimized-away inlinee for probed binary. A function inlined and then
+  // optimized away should still have their probes left over in places.
+  if (usePseudoProbes()) {
+    auto I = TopLevelProbeFrameMap.find(Func->FuncName);
+    assert(I != TopLevelProbeFrameMap.end());
+    if (I != TopLevelProbeFrameMap.end()) {
+      BinarySizeContextTracker::ProbeFrameStack ProbeContext;
+      FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder, *I->second,
+                                                 ProbeContext);
+    }
+  }
+}
+
 InstructionPointer::InstructionPointer(const ProfiledBinary *Binary,
                                        uint64_t Address, bool RoundToNext)
     : Binary(Binary), Address(Address) {