Index: include/llvm/Analysis/OptimizationDiagnosticInfo.h
===================================================================
--- include/llvm/Analysis/OptimizationDiagnosticInfo.h
+++ include/llvm/Analysis/OptimizationDiagnosticInfo.h
@@ -16,11 +16,11 @@
 #define LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H
 
 #include "llvm/ADT/Optional.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 
 namespace llvm {
-class BlockFrequencyInfo;
 class DebugLoc;
 class Function;
 class LLVMContext;
@@ -34,6 +34,10 @@
   OptimizationRemarkEmitter(Function *F, BlockFrequencyInfo *BFI)
       : F(F), BFI(BFI) {}
 
+  // This variant can be used to generate ORE on demand (without the analysis
+  // pass).
+  OptimizationRemarkEmitter(Function *F);
+
   OptimizationRemarkEmitter(OptimizationRemarkEmitter &&Arg)
       : F(Arg.F), BFI(Arg.BFI) {}
 
@@ -149,6 +153,9 @@
 
   BlockFrequencyInfo *BFI;
 
+  /// \brief If we generate BFI on demand, we need to free it when ORE is freed.
+  std::unique_ptr<BlockFrequencyInfo> OwnedBFI;
+
   Optional<uint64_t> computeHotness(const Value *V);
 
   OptimizationRemarkEmitter(const OptimizationRemarkEmitter &) = delete;
Index: include/llvm/Transforms/IPO/InlinerPass.h
===================================================================
--- include/llvm/Transforms/IPO/InlinerPass.h
+++ include/llvm/Transforms/IPO/InlinerPass.h
@@ -24,6 +24,7 @@
 class CallSite;
 class DataLayout;
 class InlineCost;
+class OptimizationRemarkEmitter;
 class ProfileSummaryInfo;
 template <class PtrType, unsigned SmallSize> class SmallPtrSet;
 
@@ -75,7 +76,7 @@
 
   /// shouldInline - Return true if the inliner should attempt to
   /// inline at the given CallSite.
-  bool shouldInline(CallSite CS);
+  bool shouldInline(CallSite CS, OptimizationRemarkEmitter &ORE);
   /// Return true if inlining of CS can block the caller from being
   /// inlined which is proved to be more beneficial. \p IC is the
   /// estimated inline cost associated with callsite \p CS.
Index: lib/Analysis/OptimizationDiagnosticInfo.cpp
===================================================================
--- lib/Analysis/OptimizationDiagnosticInfo.cpp
+++ lib/Analysis/OptimizationDiagnosticInfo.cpp
@@ -13,13 +13,37 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/LLVMContext.h"
 
 using namespace llvm;
 
+OptimizationRemarkEmitter::OptimizationRemarkEmitter(Function *F)
+    : F(F), BFI(nullptr) {
+  if (!F->getContext().getDiagnosticHotnessRequested())
+    return;
+
+  // First create a dominator tree.
+  DominatorTree DT;
+  DT.recalculate(*F);
+
+  // Generate LoopInfo from it.
+  LoopInfo LI;
+  LI.analyze(DT);
+
+  // Then compute BranchProbabilityInfo.
+  BranchProbabilityInfo BPI;
+  BPI.calculate(*F, LI);
+
+  // Finally compute BFI.
+  OwnedBFI = llvm::make_unique<BlockFrequencyInfo>(*F, BPI, LI);
+  BFI = OwnedBFI.get();
+}
+
 Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(const Value *V) {
   if (!BFI)
     return None;
Index: lib/Transforms/IPO/Inliner.cpp
===================================================================
--- lib/Transforms/IPO/Inliner.cpp
+++ lib/Transforms/IPO/Inliner.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/CallSite.h"
@@ -222,11 +223,9 @@
   return true;
 }
 
-static void emitAnalysis(CallSite CS, const Twine &Msg) {
-  Function *Caller = CS.getCaller();
-  LLVMContext &Ctx = Caller->getContext();
-  DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
-  emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg);
+static void emitAnalysis(CallSite CS, OptimizationRemarkEmitter &ORE,
+                         const Twine &Msg) {
+  ORE.emitOptimizationRemarkAnalysis(DEBUG_TYPE, CS.getInstruction(), Msg);
 }
 
 bool Inliner::shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
@@ -300,22 +299,22 @@
 }
 
 /// Return true if the inliner should attempt to inline at the given CallSite.
-bool Inliner::shouldInline(CallSite CS) {
+bool Inliner::shouldInline(CallSite CS, OptimizationRemarkEmitter &ORE) {
   InlineCost IC = getInlineCost(CS);
   
   if (IC.isAlways()) {
     DEBUG(dbgs() << "    Inlining: cost=always"
           << ", Call: " << *CS.getInstruction() << "\n");
-    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) +
-                         " should always be inlined (cost=always)");
+    emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName()) +
+                              " should always be inlined (cost=always)");
     return true;
   }
   
   if (IC.isNever()) {
     DEBUG(dbgs() << "    NOT Inlining: cost=never"
           << ", Call: " << *CS.getInstruction() << "\n");
-    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
-                           " should never be inlined (cost=never)"));
+    emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName() +
+                                " should never be inlined (cost=never)"));
     return false;
   }
   
@@ -324,10 +323,10 @@
     DEBUG(dbgs() << "    NOT Inlining: cost=" << IC.getCost()
           << ", thres=" << (IC.getCostDelta() + IC.getCost())
           << ", Call: " << *CS.getInstruction() << "\n");
-    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
-                           " too costly to inline (cost=") +
-                         Twine(IC.getCost()) + ", threshold=" +
-                         Twine(IC.getCostDelta() + IC.getCost()) + ")");
+    emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName() +
+                                " too costly to inline (cost=") +
+                              Twine(IC.getCost()) + ", threshold=" +
+                              Twine(IC.getCostDelta() + IC.getCost()) + ")");
     return false;
   }
 
@@ -336,20 +335,22 @@
     DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction()
           << " Cost = " << IC.getCost()
           << ", outer Cost = " << TotalSecondaryCost << '\n');
-    emitAnalysis(CS, Twine("Not inlining. Cost of inlining " +
-                           CS.getCalledFunction()->getName() +
-                           " increases the cost of inlining " +
-                           CS.getCaller()->getName() + " in other contexts"));
+    emitAnalysis(CS, ORE,
+                 Twine("Not inlining. Cost of inlining " +
+                       CS.getCalledFunction()->getName() +
+                       " increases the cost of inlining " +
+                       CS.getCaller()->getName() + " in other contexts"));
     return false;
   }
 
   DEBUG(dbgs() << "    Inlining: cost=" << IC.getCost()
         << ", thres=" << (IC.getCostDelta() + IC.getCost())
         << ", Call: " << *CS.getInstruction() << '\n');
-  emitAnalysis(
-      CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") +
-              CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) +
-              " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")");
+  emitAnalysis(CS, ORE, CS.getCalledFunction()->getName() +
+                            Twine(" can be inlined into ") +
+                            CS.getCaller()->getName() + " with cost=" +
+                            Twine(IC.getCost()) + " (threshold=" +
+                            Twine(IC.getCostDelta() + IC.getCost()) + ")");
   return true;
 }
 
@@ -478,36 +479,39 @@
         if (InlineHistoryID != -1 &&
             InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
           continue;
-        
-        LLVMContext &CallerCtx = Caller->getContext();
 
         // Get DebugLoc to report. CS will be invalid after Inliner.
         DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+        BasicBlock *Block = CS.getParent();
+        // FIXME for new PM: because of the old PM we currently generate ORE and
+        // in turn BFI on demand.  With the new PM, the ORE dependency should
+        // just become a regular analysis dependency.
+        OptimizationRemarkEmitter ORE(Caller);
 
         // If the policy determines that we should inline this function,
         // try to do so.
-        if (!shouldInline(CS)) {
-          emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
-                                       Twine(Callee->getName() +
-                                             " will not be inlined into " +
-                                             Caller->getName()));
+        if (!shouldInline(CS, ORE)) {
+          ORE.emitOptimizationRemarkMissed(DEBUG_TYPE, DLoc, Block,
+                                           Twine(Callee->getName() +
+                                                 " will not be inlined into " +
+                                                 Caller->getName()));
           continue;
         }
 
         // Attempt to inline the function.
         if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas,
                                   InlineHistoryID, InsertLifetime)) {
-          emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
-                                       Twine(Callee->getName() +
-                                             " will not be inlined into " +
-                                             Caller->getName()));
+          ORE.emitOptimizationRemarkMissed(DEBUG_TYPE, DLoc, Block,
+                                           Twine(Callee->getName() +
+                                                 " will not be inlined into " +
+                                                 Caller->getName()));
           continue;
         }
         ++NumInlined;
 
         // Report the inline decision.
-        emitOptimizationRemark(
-            CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+        ORE.emitOptimizationRemark(
+            DEBUG_TYPE, DLoc, Block,
             Twine(Callee->getName() + " inlined into " + Caller->getName()));
 
         // If inlining this function gave us any new call sites, throw them
Index: test/Transforms/Inline/optimization-remarks-with-hotness.ll
===================================================================
--- /dev/null
+++ test/Transforms/Inline/optimization-remarks-with-hotness.ll
@@ -0,0 +1,63 @@
+; RUN: opt < %s -inline -pass-remarks=inline -pass-remarks-missed=inline -pass-remarks-analysis=inline -pass-remarks-with-hotness -S 2>&1 | FileCheck %s
+
+; CHECK: foo should always be inlined (cost=always) (hotness: 30)
+; CHECK: foo inlined into bar (hotness: 30)
+; CHECK: foz should never be inlined (cost=never) (hotness: 30)
+; CHECK: foz will not be inlined into bar (hotness: 30)
+
+; Function Attrs: alwaysinline nounwind uwtable
+define i32 @foo(i32 %x, i32 %y) #0 !prof !1 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %0 = load i32, i32* %x.addr, align 4
+  %1 = load i32, i32* %y.addr, align 4
+  %add = add nsw i32 %0, %1
+  ret i32 %add
+}
+
+; Function Attrs: noinline nounwind uwtable
+define float @foz(i32 %x, i32 %y) #1 !prof !2 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %0 = load i32, i32* %x.addr, align 4
+  %1 = load i32, i32* %y.addr, align 4
+  %mul = mul nsw i32 %0, %1
+  %conv = sitofp i32 %mul to float
+  ret float %conv
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @bar(i32 %j) #2 !prof !3 {
+entry:
+  %j.addr = alloca i32, align 4
+  store i32 %j, i32* %j.addr, align 4
+  %0 = load i32, i32* %j.addr, align 4
+  %1 = load i32, i32* %j.addr, align 4
+  %sub = sub nsw i32 %1, 2
+  %call = call i32 @foo(i32 %0, i32 %sub)
+  %conv = sitofp i32 %call to float
+  %2 = load i32, i32* %j.addr, align 4
+  %sub1 = sub nsw i32 %2, 2
+  %3 = load i32, i32* %j.addr, align 4
+  %call2 = call float @foz(i32 %sub1, i32 %3)
+  %mul = fmul float %conv, %call2
+  %conv3 = fptosi float %mul to i32
+  ret i32 %conv3
+}
+
+attributes #0 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.5.0 "}
+!1 = !{!"function_entry_count", i64 10}
+!2 = !{!"function_entry_count", i64 20}
+!3 = !{!"function_entry_count", i64 30}